[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251019061631.2235405-14-xiyou.wangcong@gmail.com>
Date: Sat, 18 Oct 2025 23:16:27 -0700
From: Cong Wang <xiyou.wangcong@...il.com>
To: linux-kernel@...r.kernel.org
Cc: jiri@...nulli.us,
stefanha@...hat.com,
multikernel@...ts.linux.dev,
pasha.tatashin@...een.com,
Cong Wang <cwang@...tikernel.io>,
Andrew Morton <akpm@...ux-foundation.org>,
Baoquan He <bhe@...hat.com>,
Alexander Graf <graf@...zon.com>,
Mike Rapoport <rppt@...nel.org>,
Changyuan Lyu <changyuanl@...gle.com>,
kexec@...ts.infradead.org,
linux-mm@...ck.org
Subject: [RFC Patch v2 13/16] kernel: Introduce generic multikernel IPI communication framework
From: Cong Wang <cwang@...tikernel.io>
This patch implements a comprehensive IPI-based communication system
for multikernel environments, enabling data exchange between different
kernel instances running on separate CPUs.
Key features include:
- Generic IPI handler registration and callback mechanism allowing
modules to register for multikernel communication events
- Shared memory infrastructure on top of the general per-instance memory
allocation infrastructure
- Per-instance data buffers in shared memory for efficient IPI payload
transfer up to 256 bytes per message
- IRQ work integration for safe callback execution in interrupt context
- PFN-based flexible shared memory APIs for page-level data sharing
- Resource tracking integration for /proc/iomem visibility
It provides the key API multikernel_send_ipi_data() for sending
typed data to target kernel instance and multikernel_register_handler()
for registering IPI handler. Shared memory is established on top of the
per-instance memory allocation infra.
This infrastructure enables multikernel instances to coordinate and
share data while maintaining isolation on their respective CPU cores.
(Note, as a proof-of-concept, we have only implemented the x86 part.)
Signed-off-by: Cong Wang <cwang@...tikernel.io>
---
arch/x86/kernel/smp.c | 3 +
include/linux/multikernel.h | 66 +++++
kernel/multikernel/Makefile | 2 +-
kernel/multikernel/ipi.c | 471 ++++++++++++++++++++++++++++++++++++
4 files changed, 541 insertions(+), 1 deletion(-)
create mode 100644 kernel/multikernel/ipi.c
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index e2eba09da7fc..2be7c1a777ef 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -273,10 +273,13 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single)
}
#ifdef CONFIG_MULTIKERNEL
+void generic_multikernel_interrupt(void);
+
DEFINE_IDTENTRY_SYSVEC(sysvec_multikernel)
{
apic_eoi();
inc_irq_stat(irq_call_count);
+ generic_multikernel_interrupt();
}
#endif /* CONFIG_MULTIKERNEL */
diff --git a/include/linux/multikernel.h b/include/linux/multikernel.h
index 79611923649e..ee96bd2332b6 100644
--- a/include/linux/multikernel.h
+++ b/include/linux/multikernel.h
@@ -14,6 +14,72 @@
#include <linux/cpumask.h>
#include <linux/genalloc.h>
+/**
+ * Multikernel IPI interface
+ */
+
+/* Maximum data size that can be transferred via IPI */
+#define MK_MAX_DATA_SIZE 256
+
+/* Data structure for passing parameters via IPI */
+struct mk_ipi_data {
+ int sender_cpu; /* Which CPU sent this IPI */
+ unsigned int type; /* User-defined type identifier */
+ size_t data_size; /* Size of the data */
+ char buffer[MK_MAX_DATA_SIZE]; /* Actual data buffer */
+};
+
+/* Function pointer type for IPI callbacks */
+typedef void (*mk_ipi_callback_t)(struct mk_ipi_data *data, void *ctx);
+
+struct mk_ipi_handler {
+ mk_ipi_callback_t callback;
+ void *context;
+ unsigned int ipi_type; /* IPI type this handler is registered for */
+ struct mk_ipi_handler *next;
+ struct mk_ipi_data *saved_data;
+ struct irq_work work;
+};
+
+/**
+ * multikernel_register_handler - Register a callback for multikernel IPI
+ * @callback: Function to call when IPI is received
+ * @ctx: Context pointer passed to the callback
+ * @ipi_type: IPI type this handler should process
+ *
+ * Returns pointer to handler on success, NULL on failure
+ */
+struct mk_ipi_handler *multikernel_register_handler(mk_ipi_callback_t callback, void *ctx, unsigned int ipi_type);
+
+/**
+ * multikernel_unregister_handler - Unregister a multikernel IPI callback
+ * @handler: Handler pointer returned from multikernel_register_handler
+ */
+void multikernel_unregister_handler(struct mk_ipi_handler *handler);
+
+/**
+ * multikernel_send_ipi_data - Send data to another CPU via IPI
+ * @instance_id: Target multikernel instance ID
+ * @data: Pointer to data to send
+ * @data_size: Size of data
+ * @type: User-defined type identifier
+ *
+ * This function copies the data to per-CPU storage and sends an IPI
+ * to the target CPU.
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int multikernel_send_ipi_data(int instance_id, void *data, size_t data_size, unsigned long type);
+
+void generic_multikernel_interrupt(void);
+
+/* Flexible shared memory APIs (PFN-based) */
+int mk_send_pfn(int instance_id, unsigned long pfn);
+int mk_receive_pfn(struct mk_ipi_data *data, unsigned long *out_pfn);
+void *mk_receive_map_page(struct mk_ipi_data *data);
+
+#define mk_receive_unmap_page(p) memunmap(p)
+
struct resource;
extern phys_addr_t multikernel_alloc(size_t size);
diff --git a/kernel/multikernel/Makefile b/kernel/multikernel/Makefile
index d004c577f13d..b539acc656c6 100644
--- a/kernel/multikernel/Makefile
+++ b/kernel/multikernel/Makefile
@@ -3,7 +3,7 @@
# Makefile for multikernel support
#
-obj-y += core.o mem.o kernfs.o dts.o
+obj-y += core.o mem.o kernfs.o dts.o ipi.o
# Add libfdt include path for device tree parsing
CFLAGS_dts.o = -I $(srctree)/scripts/dtc/libfdt
diff --git a/kernel/multikernel/ipi.c b/kernel/multikernel/ipi.c
new file mode 100644
index 000000000000..b5c4a06747a2
--- /dev/null
+++ b/kernel/multikernel/ipi.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Multikernel Technologies, Inc. All rights reserved
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/multikernel.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <asm/apic.h>
+
+/* Per-instance IPI data - no more global variables */
+struct mk_instance_ipi_data {
+ void *instance_pool; /* Instance pool handle */
+ struct mk_shared_data *shared_mem; /* IPI shared memory for this instance */
+ size_t shared_mem_size; /* Size of shared memory */
+};
+
+/* Shared memory structures - per-instance design */
+struct mk_shared_data {
+ struct mk_ipi_data cpu_data[NR_CPUS]; /* Data area for each CPU */
+};
+
+#define MK_MAX_INSTANCES 256
+static struct mk_instance_ipi_data *mk_instance_ipi_map[MK_MAX_INSTANCES];
+static DEFINE_SPINLOCK(mk_ipi_map_lock);
+
+static struct mk_shared_data *mk_this_kernel_ipi_data;
+static phys_addr_t mk_ipi_shared_phys_addr;
+
+/* Callback management */
+static struct mk_ipi_handler *mk_handlers;
+static raw_spinlock_t mk_handlers_lock = __RAW_SPIN_LOCK_UNLOCKED(mk_handlers_lock);
+
+static void *multikernel_alloc_ipi_buffer(void *pool_handle, size_t buffer_size);
+static void multikernel_free_ipi_buffer(void *pool_handle, void *virt_addr, size_t buffer_size);
+
+static void handler_work(struct irq_work *work)
+{
+ struct mk_ipi_handler *handler = container_of(work, struct mk_ipi_handler, work);
+ if (handler->callback)
+ handler->callback(handler->saved_data, handler->context);
+}
+
+/**
+ * mk_instance_ipi_create() - Create IPI data for a multikernel instance
+ * @instance: The multikernel instance
+ *
+ * Allocates and initializes IPI communication buffers for the given instance.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int mk_instance_ipi_create(struct mk_instance *instance)
+{
+ struct mk_instance_ipi_data *ipi_data;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!instance || instance->id < 0 || instance->id >= MK_MAX_INSTANCES)
+ return -EINVAL;
+
+ ipi_data = kzalloc(sizeof(*ipi_data), GFP_KERNEL);
+ if (!ipi_data)
+ return -ENOMEM;
+
+ /* Use the instance's own memory pool */
+ ipi_data->instance_pool = instance->instance_pool;
+ if (!ipi_data->instance_pool) {
+ pr_err("Instance %d has no memory pool for IPI allocation\n", instance->id);
+ kfree(ipi_data);
+ return -ENODEV;
+ }
+
+ /* Allocate IPI buffer from the instance pool */
+ ipi_data->shared_mem_size = sizeof(struct mk_shared_data);
+ ipi_data->shared_mem = multikernel_alloc_ipi_buffer(ipi_data->instance_pool,
+ ipi_data->shared_mem_size);
+ if (!ipi_data->shared_mem) {
+ pr_err("Failed to allocate IPI shared memory for instance %d\n", instance->id);
+ kfree(ipi_data);
+ return -ENOMEM;
+ }
+
+ /* Initialize the shared memory structure */
+ memset(ipi_data->shared_mem, 0, ipi_data->shared_mem_size);
+
+ /* Register in the global map */
+ spin_lock_irqsave(&mk_ipi_map_lock, flags);
+ if (mk_instance_ipi_map[instance->id]) {
+ pr_err("IPI data already exists for instance %d\n", instance->id);
+ ret = -EEXIST;
+ } else {
+ mk_instance_ipi_map[instance->id] = ipi_data;
+ }
+ spin_unlock_irqrestore(&mk_ipi_map_lock, flags);
+
+ if (ret) {
+ multikernel_free_ipi_buffer(ipi_data->instance_pool,
+ ipi_data->shared_mem,
+ ipi_data->shared_mem_size);
+ kfree(ipi_data);
+ return ret;
+ }
+
+ pr_info("Created IPI data for instance %d (%s): virt=%px, size=%zu bytes\n",
+ instance->id, instance->name, ipi_data->shared_mem, ipi_data->shared_mem_size);
+
+ return 0;
+}
+
+/**
+ * mk_instance_ipi_destroy() - Destroy IPI data for a multikernel instance
+ * @instance_id: The instance ID
+ *
+ * Cleans up and frees IPI communication buffers for the given instance.
+ */
+static void mk_instance_ipi_destroy(int instance_id)
+{
+ struct mk_instance_ipi_data *ipi_data;
+ unsigned long flags;
+
+ if (instance_id < 0 || instance_id >= MK_MAX_INSTANCES)
+ return;
+
+ spin_lock_irqsave(&mk_ipi_map_lock, flags);
+ ipi_data = mk_instance_ipi_map[instance_id];
+ mk_instance_ipi_map[instance_id] = NULL;
+ spin_unlock_irqrestore(&mk_ipi_map_lock, flags);
+
+ if (!ipi_data)
+ return;
+
+ pr_debug("Destroying IPI data for instance %d\n", instance_id);
+
+ /* Free the shared memory buffer */
+ if (ipi_data->shared_mem) {
+ multikernel_free_ipi_buffer(ipi_data->instance_pool,
+ ipi_data->shared_mem,
+ ipi_data->shared_mem_size);
+ }
+
+ kfree(ipi_data);
+}
+
+/**
+ * mk_instance_ipi_get() - Get IPI data for a multikernel instance
+ * @instance_id: The instance ID
+ *
+ * Returns the IPI data for the given instance, or NULL if not found.
+ */
+static struct mk_instance_ipi_data *mk_instance_ipi_get(int instance_id)
+{
+ struct mk_instance_ipi_data *ipi_data;
+ unsigned long flags;
+
+ if (instance_id < 0 || instance_id >= MK_MAX_INSTANCES)
+ return NULL;
+
+ spin_lock_irqsave(&mk_ipi_map_lock, flags);
+ ipi_data = mk_instance_ipi_map[instance_id];
+ spin_unlock_irqrestore(&mk_ipi_map_lock, flags);
+
+ return ipi_data;
+}
+
+/**
+ * multikernel_register_handler - Register a callback for multikernel IPI
+ * @callback: Function to call when IPI is received
+ * @ctx: Context pointer passed to the callback
+ * @ipi_type: IPI type this handler should process
+ *
+ * Returns pointer to handler on success, NULL on failure
+ */
+struct mk_ipi_handler *multikernel_register_handler(mk_ipi_callback_t callback, void *ctx, unsigned int ipi_type)
+{
+ struct mk_ipi_handler *handler;
+ unsigned long flags;
+
+ if (!callback)
+ return NULL;
+
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler)
+ return NULL;
+
+ handler->callback = callback;
+ handler->context = ctx;
+ handler->ipi_type = ipi_type;
+
+ init_irq_work(&handler->work, handler_work);
+
+ raw_spin_lock_irqsave(&mk_handlers_lock, flags);
+ handler->next = mk_handlers;
+ mk_handlers = handler;
+ raw_spin_unlock_irqrestore(&mk_handlers_lock, flags);
+
+ return handler;
+}
+EXPORT_SYMBOL(multikernel_register_handler);
+
+/**
+ * multikernel_unregister_handler - Unregister a multikernel IPI callback
+ * @handler: Handler pointer returned from multikernel_register_handler
+ */
+void multikernel_unregister_handler(struct mk_ipi_handler *handler)
+{
+ struct mk_ipi_handler **pp, *p;
+ unsigned long flags;
+
+ if (!handler)
+ return;
+
+ raw_spin_lock_irqsave(&mk_handlers_lock, flags);
+ pp = &mk_handlers;
+ while ((p = *pp) != NULL) {
+ if (p == handler) {
+ *pp = p->next;
+ break;
+ }
+ pp = &p->next;
+ }
+ raw_spin_unlock_irqrestore(&mk_handlers_lock, flags);
+
+ /* Wait for pending work to complete */
+ irq_work_sync(&handler->work);
+ kfree(p);
+}
+EXPORT_SYMBOL(multikernel_unregister_handler);
+
+/**
+ * multikernel_send_ipi_data - Send data to another CPU via IPI
+ * @instance_id: Target multikernel instance ID
+ * @data: Pointer to data to send
+ * @data_size: Size of data
+ * @type: User-defined type identifier
+ *
+ * This function copies the data to per-CPU storage and sends an IPI
+ * to the target CPU. The cpu parameter must be a physical CPU ID.
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int multikernel_send_ipi_data(int instance_id, void *data, size_t data_size, unsigned long type)
+{
+ struct mk_instance_ipi_data *ipi_data;
+ struct mk_ipi_data *target;
+ struct mk_instance *instance = mk_instance_find(instance_id);
+ int cpu ;
+
+ if (!instance)
+ return -EINVAL;
+ if (data_size > MK_MAX_DATA_SIZE)
+ return -EINVAL;
+
+ cpu = cpumask_first(instance->cpus);
+ /* Get the IPI data for the target instance */
+ ipi_data = mk_instance_ipi_get(instance_id);
+ if (!ipi_data || !ipi_data->shared_mem) {
+ pr_debug("Multikernel IPI shared memory not available for instance %d\n", instance_id);
+ return -ENODEV;
+ }
+
+ /* Get target CPU's data area from shared memory */
+ target = &ipi_data->shared_mem->cpu_data[cpu];
+
+ /* Initialize/clear the IPI data structure to prevent stale data */
+ memset(target, 0, sizeof(*target));
+
+ /* Set header information */
+ target->data_size = data_size;
+ target->sender_cpu = arch_cpu_physical_id(smp_processor_id());
+ target->type = type;
+
+ /* Copy the actual data into the buffer */
+ if (data && data_size > 0)
+ memcpy(target->buffer, data, data_size);
+
+ /* Send IPI to target CPU using physical CPU ID */
+ __apic_send_IPI(cpu, MULTIKERNEL_VECTOR);
+
+ return 0;
+}
+
+/**
+ * multikernel_interrupt_handler - Handle the multikernel IPI
+ *
+ * This function is called when a multikernel IPI is received.
+ * It invokes all registered callbacks with the per-CPU data.
+ *
+ * In spawned kernels, we use the shared IPI data passed via boot parameter.
+ * In host kernels, we may need to check instance mappings.
+ */
+static void multikernel_interrupt_handler(void)
+{
+ struct mk_ipi_data *data;
+ struct mk_ipi_handler *handler;
+ int current_cpu = smp_processor_id();
+ int current_physical_id = arch_cpu_physical_id(current_cpu);
+
+ if (!mk_this_kernel_ipi_data)
+ return;
+
+ data = &mk_this_kernel_ipi_data->cpu_data[current_physical_id];
+
+ if (data->data_size == 0 || data->data_size > MK_MAX_DATA_SIZE) {
+ pr_debug("Multikernel IPI received on CPU %d but no valid data\n", current_cpu);
+ return;
+ }
+
+ pr_info("Multikernel IPI received on CPU %d (physical id %d) from CPU %d type=%u\n",
+ current_cpu, current_physical_id, data->sender_cpu, data->type);
+
+ raw_spin_lock(&mk_handlers_lock);
+ for (handler = mk_handlers; handler; handler = handler->next) {
+ if (handler->ipi_type == data->type) {
+ handler->saved_data = data;
+ irq_work_queue(&handler->work);
+ }
+ }
+ raw_spin_unlock(&mk_handlers_lock);
+}
+
+/**
+ * Generic multikernel interrupt handler - called by the IPI vector
+ *
+ * This is the function that gets called by the IPI vector handler.
+ */
+void generic_multikernel_interrupt(void)
+{
+ multikernel_interrupt_handler();
+}
+
+/**
+ * multikernel_alloc_ipi_buffer() - Allocate IPI communication buffer
+ * @pool_handle: Instance pool handle
+ * @buffer_size: Size of IPI buffer needed
+ *
+ * Allocates and maps a buffer suitable for IPI communication.
+ * Returns virtual address of mapped buffer, or NULL on failure.
+ */
+static void *multikernel_alloc_ipi_buffer(void *pool_handle, size_t buffer_size)
+{
+ phys_addr_t phys_addr;
+ void *virt_addr;
+
+ phys_addr = multikernel_instance_alloc(pool_handle, buffer_size, PAGE_SIZE);
+ if (!phys_addr) {
+ pr_err("Failed to allocate %zu bytes for IPI buffer\n", buffer_size);
+ return NULL;
+ }
+
+ /* Map to virtual address space */
+ virt_addr = memremap(phys_addr, buffer_size, MEMREMAP_WB);
+ if (!virt_addr) {
+ pr_err("Failed to map IPI buffer at 0x%llx\n", (unsigned long long)phys_addr);
+ multikernel_instance_free(pool_handle, phys_addr, buffer_size);
+ return NULL;
+ }
+
+ pr_debug("Allocated IPI buffer: phys=0x%llx, virt=%px, size=%zu\n",
+ (unsigned long long)phys_addr, virt_addr, buffer_size);
+
+ return virt_addr;
+}
+
+/**
+ * multikernel_free_ipi_buffer() - Free IPI communication buffer
+ * @pool_handle: Instance pool handle
+ * @virt_addr: Virtual address returned by multikernel_alloc_ipi_buffer()
+ * @buffer_size: Size of the buffer
+ *
+ * Unmaps and frees an IPI buffer back to the instance pool.
+ */
+static void multikernel_free_ipi_buffer(void *pool_handle, void *virt_addr, size_t buffer_size)
+{
+ phys_addr_t phys_addr;
+
+ if (!virt_addr)
+ return;
+
+ /* Convert virtual address back to physical */
+ phys_addr = virt_to_phys(virt_addr);
+
+ /* Unmap virtual address */
+ memunmap(virt_addr);
+
+ /* Free back to instance pool */
+ multikernel_instance_free(pool_handle, phys_addr, buffer_size);
+
+ pr_debug("Freed IPI buffer: phys=0x%llx, virt=%px, size=%zu\n",
+ (unsigned long long)phys_addr, virt_addr, buffer_size);
+}
+
+static int __init mk_ipi_shared_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ mk_ipi_shared_phys_addr = memparse(str, NULL);
+ if (!mk_ipi_shared_phys_addr) {
+ pr_err("Invalid multikernel IPI shared memory address: %s\n", str);
+ return -EINVAL;
+ }
+
+ pr_info("Multikernel IPI shared memory address: 0x%llx\n",
+ (unsigned long long)mk_ipi_shared_phys_addr);
+ return 0;
+}
+early_param("mk_ipi_shared", mk_ipi_shared_setup);
+
+/**
+ * multikernel_ipi_init - Initialize multikernel IPI subsystem
+ *
+ * Sets up IPI handling infrastructure.
+ * - In spawned kernels: IPI buffer is mapped from boot parameter address
+ * Returns 0 on success, negative error code on failure
+ */
+static int __init multikernel_ipi_init(void)
+{
+ /* Check if we're in a spawned kernel with IPI shared memory address */
+ if (mk_ipi_shared_phys_addr) {
+ /* Spawned kernel: Map the shared IPI memory */
+ mk_this_kernel_ipi_data = memremap(mk_ipi_shared_phys_addr,
+ sizeof(struct mk_shared_data),
+ MEMREMAP_WB);
+ if (!mk_this_kernel_ipi_data) {
+ pr_err("Failed to map multikernel IPI shared memory at 0x%llx\n",
+ (unsigned long long)mk_ipi_shared_phys_addr);
+ return -ENOMEM;
+ }
+
+ pr_info("Multikernel IPI subsystem initialized (spawned kernel): virt=%px, phys=0x%llx\n",
+ mk_this_kernel_ipi_data, (unsigned long long)mk_ipi_shared_phys_addr);
+ }
+
+ return 0;
+}
+subsys_initcall(multikernel_ipi_init);
+
+/* ---- Flexible shared memory APIs (PFN-based) ---- */
+#define MK_PFN_IPI_TYPE 0x80000001U
+
+/* Send a PFN to another kernel via mk_ipi_data */
+int mk_send_pfn(int instance_id, unsigned long pfn)
+{
+ return multikernel_send_ipi_data(instance_id, &pfn, sizeof(pfn), MK_PFN_IPI_TYPE);
+}
+
+/* Receive a PFN from mk_ipi_data. Caller must check type. */
+int mk_receive_pfn(struct mk_ipi_data *data, unsigned long *out_pfn)
+{
+ if (!data || !out_pfn)
+ return -EINVAL;
+ if (data->type != MK_PFN_IPI_TYPE || data->data_size != sizeof(unsigned long))
+ return -EINVAL;
+ *out_pfn = *(unsigned long *)data->buffer;
+ return 0;
+}
+
+void *mk_receive_map_page(struct mk_ipi_data *data)
+{
+ unsigned long pfn;
+ int ret;
+
+ ret = mk_receive_pfn(data, &pfn);
+ if (ret < 0)
+ return NULL;
+ return memremap(pfn << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB);
+}
--
2.34.1
Powered by blists - more mailing lists