[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250918222607.186488-5-xiyou.wangcong@gmail.com>
Date: Thu, 18 Sep 2025 15:26:03 -0700
From: Cong Wang <xiyou.wangcong@...il.com>
To: linux-kernel@...r.kernel.org
Cc: pasha.tatashin@...een.com,
Cong Wang <cwang@...tikernel.io>,
Andrew Morton <akpm@...ux-foundation.org>,
Baoquan He <bhe@...hat.com>,
Alexander Graf <graf@...zon.com>,
Mike Rapoport <rppt@...nel.org>,
Changyuan Lyu <changyuanl@...gle.com>,
kexec@...ts.infradead.org,
linux-mm@...ck.org
Subject: [RFC Patch 4/7] kernel: Introduce generic multikernel IPI communication framework
From: Cong Wang <cwang@...tikernel.io>
This patch implements a comprehensive IPI-based communication system
for multikernel environments, enabling data exchange between different
kernel instances running on separate CPUs.
Key features include:
- Generic IPI handler registration and callback mechanism allowing
modules to register for multikernel communication events
- Shared memory infrastructure using either boot parameter-specified
or dynamically allocated physical memory regions
- Per-CPU data buffers in shared memory for efficient IPI payload
transfer up to 256 bytes per message
- IRQ work integration for safe callback execution in interrupt context
- PFN-based flexible shared memory APIs for page-level data sharing
- Resource tracking integration for /proc/iomem visibility
The implementation provides multikernel_send_ipi_data() for sending
typed data to target CPUs and multikernel_register_handler() for
receiving notifications. Shared memory is established during early
boot and mapped using memremap() for cache-coherent access.
This infrastructure enables heterogeneous computing scenarios where
multikernel instances can coordinate and share data while maintaining
isolation on their respective CPU cores.
Note, as a proof-of-concept, we have only implemented the x86 part.
Signed-off-by: Cong Wang <cwang@...tikernel.io>
---
arch/x86/kernel/smp.c | 5 +-
include/linux/multikernel.h | 81 ++++++++++
init/main.c | 2 +
kernel/Makefile | 2 +-
kernel/multikernel.c | 313 ++++++++++++++++++++++++++++++++++++
5 files changed, 398 insertions(+), 5 deletions(-)
create mode 100644 include/linux/multikernel.h
create mode 100644 kernel/multikernel.c
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 028cc423a772..3ee515e32383 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -272,10 +272,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_function_single)
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
}
-static void generic_multikernel_interrupt(void)
-{
- pr_info("Multikernel interrupt\n");
-}
+void generic_multikernel_interrupt(void);
DEFINE_IDTENTRY_SYSVEC(sysvec_multikernel)
{
diff --git a/include/linux/multikernel.h b/include/linux/multikernel.h
new file mode 100644
index 000000000000..12ed5e03f92e
--- /dev/null
+++ b/include/linux/multikernel.h
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Multikernel Technologies, Inc. All rights reserved
+ */
+#ifndef _LINUX_MULTIKERNEL_H
+#define _LINUX_MULTIKERNEL_H
+
+#include <linux/types.h>
+#include <linux/irq_work.h>
+
+/**
+ * Multikernel IPI interface
+ *
+ * This header provides declarations for the multikernel IPI interface,
+ * allowing modules to register callbacks for IPI events and pass data
+ * between CPUs.
+ */
+
+/* Maximum data size that can be transferred via IPI */
+#define MK_MAX_DATA_SIZE 256
+
+/* Data structure for passing parameters via IPI */
+struct mk_ipi_data {
+ int sender_cpu; /* Which CPU sent this IPI */
+ unsigned int type; /* User-defined type identifier */
+ size_t data_size; /* Size of the data */
+ char buffer[MK_MAX_DATA_SIZE]; /* Actual data buffer */
+};
+
+/* Function pointer type for IPI callbacks */
+typedef void (*mk_ipi_callback_t)(struct mk_ipi_data *data, void *ctx);
+
+struct mk_ipi_handler {
+ mk_ipi_callback_t callback;
+ void *context;
+ struct mk_ipi_handler *next;
+ struct mk_ipi_data *saved_data;
+ struct irq_work work;
+};
+
+/**
+ * multikernel_register_handler - Register a callback for multikernel IPI
+ * @callback: Function to call when IPI is received
+ * @ctx: Context pointer passed to the callback
+ *
+ * Returns pointer to handler on success, NULL on failure
+ */
+struct mk_ipi_handler *multikernel_register_handler(mk_ipi_callback_t callback, void *ctx);
+
+/**
+ * multikernel_unregister_handler - Unregister a multikernel IPI callback
+ * @handler: Handler pointer returned from multikernel_register_handler
+ */
+void multikernel_unregister_handler(struct mk_ipi_handler *handler);
+
+/**
+ * multikernel_send_ipi_data - Send data to another CPU via IPI
+ * @cpu: Target CPU
+ * @data: Pointer to data to send
+ * @data_size: Size of data
+ * @type: User-defined type identifier
+ *
+ * This function copies the data to per-CPU storage and sends an IPI
+ * to the target CPU.
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int multikernel_send_ipi_data(int cpu, void *data, size_t data_size, unsigned long type);
+
+void generic_multikernel_interrupt(void);
+
+int __init multikernel_init(void);
+
+/* Flexible shared memory APIs (PFN-based) */
+int mk_send_pfn(int target_cpu, unsigned long pfn);
+int mk_receive_pfn(struct mk_ipi_data *data, unsigned long *out_pfn);
+void *mk_receive_map_page(struct mk_ipi_data *data);
+
+#define mk_receive_unmap_page(p) memunmap(p)
+
+#endif /* _LINUX_MULTIKERNEL_H */
diff --git a/init/main.c b/init/main.c
index 5753e9539ae6..46a199bcb389 100644
--- a/init/main.c
+++ b/init/main.c
@@ -103,6 +103,7 @@
#include <linux/randomize_kstack.h>
#include <linux/pidfs.h>
#include <linux/ptdump.h>
+#include <linux/multikernel.h>
#include <net/net_namespace.h>
#include <asm/io.h>
@@ -955,6 +956,7 @@ void start_kernel(void)
vfs_caches_init_early();
sort_main_extable();
trap_init();
+ multikernel_init();
mm_core_init();
maple_tree_init();
poking_init();
diff --git a/kernel/Makefile b/kernel/Makefile
index c60623448235..e5216610a4e7 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o \
extable.o params.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
- async.o range.o smpboot.o ucount.o regset.o ksyms_common.o
+ async.o range.o smpboot.o ucount.o regset.o ksyms_common.o multikernel.o
obj-$(CONFIG_MULTIUSER) += groups.o
obj-$(CONFIG_VHOST_TASK) += vhost_task.o
diff --git a/kernel/multikernel.c b/kernel/multikernel.c
new file mode 100644
index 000000000000..74e2f84b7914
--- /dev/null
+++ b/kernel/multikernel.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Multikernel Technologies, Inc. All rights reserved
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/spinlock.h>
+#include <linux/multikernel.h>
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <asm/apic.h>
+#include <linux/memblock.h>
+
+/* Memory parameters for shared region */
+#define MK_IPI_DATA_SIZE (sizeof(struct mk_ipi_data) * NR_CPUS)
+#define MK_MEM_BASE_SIZE (sizeof(struct mk_shared_data))
+#define MK_MEM_SIZE (MK_MEM_BASE_SIZE + PAGE_SIZE)
+
+/* Boot parameter for physical address */
+static unsigned long mk_phys_addr_param;
+
+/* Parse multikernel physical address from kernel command line */
+static int __init multikernel_phys_addr_setup(char *str)
+{
+ return kstrtoul(str, 0, &mk_phys_addr_param);
+}
+early_param("mk_shared_memory", multikernel_phys_addr_setup);
+
+/* Allocated/assigned physical address for shared memory */
+static phys_addr_t mk_phys_addr_base;
+
+/* Resource structure for tracking the memory in /proc/iomem */
+static struct resource mk_mem_res __ro_after_init = {
+ .name = "Multikernel Shared Memory",
+ .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
+/* Shared memory structures */
+struct mk_shared_data {
+ struct mk_ipi_data cpu_data[NR_CPUS]; /* Data area for each CPU */
+};
+
+/* Pointer to the shared memory area (remapped virtual address) */
+static struct mk_shared_data *mk_shared_mem;
+
+/* Callback management */
+static struct mk_ipi_handler *mk_handlers;
+static raw_spinlock_t mk_handlers_lock = __RAW_SPIN_LOCK_UNLOCKED(mk_handlers_lock);
+
+static void handler_work(struct irq_work *work)
+{
+ struct mk_ipi_handler *handler = container_of(work, struct mk_ipi_handler, work);
+ if (handler->callback)
+ handler->callback(handler->saved_data, handler->context);
+}
+
+/**
+ * multikernel_register_handler - Register a callback for multikernel IPI
+ * @callback: Function to call when IPI is received
+ * @ctx: Context pointer passed to the callback
+ *
+ * Returns pointer to handler on success, NULL on failure
+ */
+struct mk_ipi_handler *multikernel_register_handler(mk_ipi_callback_t callback, void *ctx)
+{
+ struct mk_ipi_handler *handler;
+ unsigned long flags;
+
+ if (!callback)
+ return NULL;
+
+ handler = kzalloc(sizeof(*handler), GFP_KERNEL);
+ if (!handler)
+ return NULL;
+
+ handler->callback = callback;
+ handler->context = ctx;
+
+ init_irq_work(&handler->work, handler_work);
+
+ raw_spin_lock_irqsave(&mk_handlers_lock, flags);
+ handler->next = mk_handlers;
+ mk_handlers = handler;
+ raw_spin_unlock_irqrestore(&mk_handlers_lock, flags);
+
+ return handler;
+}
+EXPORT_SYMBOL(multikernel_register_handler);
+
+/**
+ * multikernel_unregister_handler - Unregister a multikernel IPI callback
+ * @handler: Handler pointer returned from multikernel_register_handler
+ */
+void multikernel_unregister_handler(struct mk_ipi_handler *handler)
+{
+ struct mk_ipi_handler **pp, *p;
+ unsigned long flags;
+
+ if (!handler)
+ return;
+
+ raw_spin_lock_irqsave(&mk_handlers_lock, flags);
+ pp = &mk_handlers;
+ while ((p = *pp) != NULL) {
+ if (p == handler) {
+ *pp = p->next;
+ break;
+ }
+ pp = &p->next;
+ }
+ raw_spin_unlock_irqrestore(&mk_handlers_lock, flags);
+
+ /* Wait for pending work to complete */
+ irq_work_sync(&handler->work);
+ kfree(p);
+}
+EXPORT_SYMBOL(multikernel_unregister_handler);
+
+/**
+ * multikernel_send_ipi_data - Send data to another CPU via IPI
+ * @cpu: Target CPU
+ * @data: Pointer to data to send
+ * @data_size: Size of data
+ * @type: User-defined type identifier
+ *
+ * This function copies the data to per-CPU storage and sends an IPI
+ * to the target CPU.
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int multikernel_send_ipi_data(int cpu, void *data, size_t data_size, unsigned long type)
+{
+ struct mk_ipi_data *target;
+
+ if (cpu < 0 || cpu >= nr_cpu_ids)
+ return -EINVAL;
+
+ if (data_size > MK_MAX_DATA_SIZE)
+ return -EINVAL; /* Data too large for buffer */
+
+ /* Ensure shared memory is initialized */
+ if (!mk_shared_mem)
+ return -ENOMEM;
+
+ /* Get target CPU's data area from shared memory */
+ target = &mk_shared_mem->cpu_data[cpu];
+
+ /* Set header information */
+ target->data_size = data_size;
+ target->sender_cpu = smp_processor_id();
+ target->type = type;
+
+ /* Copy the actual data into the buffer */
+ if (data && data_size > 0)
+ memcpy(target->buffer, data, data_size);
+
+ /* Send IPI to target CPU */
+ __apic_send_IPI(cpu, MULTIKERNEL_VECTOR);
+
+ return 0;
+}
+EXPORT_SYMBOL(multikernel_send_ipi_data);
+
+/**
+ * multikernel_interrupt_handler - Handle the multikernel IPI
+ *
+ * This function is called when a multikernel IPI is received.
+ * It invokes all registered callbacks with the per-CPU data.
+ */
+static void multikernel_interrupt_handler(void)
+{
+ struct mk_ipi_data *data;
+ struct mk_ipi_handler *handler;
+ int current_cpu = smp_processor_id();
+
+ /* Ensure shared memory is initialized */
+ if (!mk_shared_mem) {
+ pr_err("Multikernel IPI received but shared memory not initialized\n");
+ return;
+ }
+
+ /* Get this CPU's data area from shared memory */
+ data = &mk_shared_mem->cpu_data[current_cpu];
+
+ pr_debug("Multikernel IPI received on CPU %d from CPU %d, type=%u\n",
+ current_cpu, data->sender_cpu, data->type);
+
+ raw_spin_lock(&mk_handlers_lock);
+ for (handler = mk_handlers; handler; handler = handler->next) {
+ handler->saved_data = data;
+ irq_work_queue(&handler->work);
+ }
+ raw_spin_unlock(&mk_handlers_lock);
+}
+
+/**
+ * Generic multikernel interrupt handler - called by the IPI vector
+ *
+ * This is the function that gets called by the IPI vector handler.
+ */
+void generic_multikernel_interrupt(void)
+{
+ multikernel_interrupt_handler();
+}
+
+/**
+ * setup_shared_memory - Initialize shared memory for inter-kernel communication
+ *
+ * Maps a fixed physical memory region for sharing IPI data between kernels
+ * Returns 0 on success, negative error code on failure
+ */
+static int __init setup_shared_memory(void)
+{
+ /* Check if a fixed physical address was provided via parameter */
+ if (mk_phys_addr_param) {
+ /* Use the provided physical address */
+ mk_phys_addr_base = (phys_addr_t)mk_phys_addr_param;
+ pr_info("Using specified physical address 0x%llx for multikernel shared memory\n",
+ (unsigned long long)mk_phys_addr_base);
+ } else {
+ /* Dynamically allocate contiguous physical memory using memblock */
+ mk_phys_addr_base = memblock_phys_alloc(MK_MEM_SIZE, PAGE_SIZE);
+ if (!mk_phys_addr_base) {
+ pr_err("Failed to allocate physical memory for multikernel IPI data\n");
+ return -ENOMEM;
+ }
+ }
+
+ /* Map the physical memory region to virtual address space */
+ mk_shared_mem = memremap(mk_phys_addr_base, MK_MEM_SIZE, MEMREMAP_WB);
+ if (!mk_shared_mem) {
+ pr_err("Failed to map shared memory at 0x%llx for multikernel IPI data\n",
+ (unsigned long long)mk_phys_addr_base);
+
+ /* Only free the memory if we allocated it dynamically */
+ if (!mk_phys_addr_param)
+ memblock_phys_free(mk_phys_addr_base, MK_MEM_SIZE);
+ return -ENOMEM;
+ }
+
+ /* Initialize the memory to zero */
+ memset(mk_shared_mem, 0, sizeof(struct mk_shared_data));
+
+ pr_info("Allocated and mapped multikernel shared memory: phys=0x%llx, virt=%px, size=%lu bytes\n",
+ (unsigned long long)mk_phys_addr_base, mk_shared_mem, MK_MEM_SIZE);
+
+ return 0;
+}
+
+int __init multikernel_init(void)
+{
+ int ret;
+
+ ret = setup_shared_memory();
+ if (ret < 0)
+ return ret;
+
+ pr_info("Multikernel IPI support initialized\n");
+ return 0;
+}
+
+static int __init init_shared_memory(void)
+{
+ /* Set up resource structure for /proc/iomem visibility */
+ mk_mem_res.start = mk_phys_addr_base;
+ mk_mem_res.end = mk_phys_addr_base + MK_MEM_SIZE - 1;
+
+ /* Register the resource in the global resource tree */
+ if (insert_resource(&iomem_resource, &mk_mem_res)) {
+ pr_warn("Could not register multikernel shared memory region in resource tracking\n");
+ /* Continue anyway as this is not fatal */
+ return -1;
+ }
+
+ pr_info("Registered multikernel shared memory in resource tree: 0x%llx-0x%llx\n",
+ (unsigned long long)mk_mem_res.start, (unsigned long long)mk_mem_res.end);
+ return 0;
+}
+core_initcall(init_shared_memory);
+
+/* ---- Flexible shared memory APIs (PFN-based) ---- */
+#define MK_PFN_IPI_TYPE 0x80000001U
+
+/* Send a PFN to another kernel via mk_ipi_data */
+int mk_send_pfn(int target_cpu, unsigned long pfn)
+{
+ return multikernel_send_ipi_data(target_cpu, &pfn, sizeof(pfn), MK_PFN_IPI_TYPE);
+}
+
+/* Receive a PFN from mk_ipi_data. Caller must check type. */
+int mk_receive_pfn(struct mk_ipi_data *data, unsigned long *out_pfn)
+{
+ if (!data || !out_pfn)
+ return -EINVAL;
+ if (data->type != MK_PFN_IPI_TYPE || data->data_size != sizeof(unsigned long))
+ return -EINVAL;
+ *out_pfn = *(unsigned long *)data->buffer;
+ return 0;
+}
+
+void *mk_receive_map_page(struct mk_ipi_data *data)
+{
+ unsigned long pfn;
+ int ret;
+
+ ret = mk_receive_pfn(data, &pfn);
+ if (ret < 0)
+ return NULL;
+ return memremap(pfn << PAGE_SHIFT, PAGE_SIZE, MEMREMAP_WB);
+}
--
2.34.1
Powered by blists - more mailing lists