lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20251019061631.2235405-15-xiyou.wangcong@gmail.com>
Date: Sat, 18 Oct 2025 23:16:28 -0700
From: Cong Wang <xiyou.wangcong@...il.com>
To: linux-kernel@...r.kernel.org
Cc: jiri@...nulli.us,
	stefanha@...hat.com,
	multikernel@...ts.linux.dev,
	pasha.tatashin@...een.com,
	Cong Wang <cwang@...tikernel.io>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Baoquan He <bhe@...hat.com>,
	Alexander Graf <graf@...zon.com>,
	Mike Rapoport <rppt@...nel.org>,
	Changyuan Lyu <changyuanl@...gle.com>,
	kexec@...ts.infradead.org,
	linux-mm@...ck.org
Subject: [RFC Patch v2 14/16] multikernel: Add messaging layer for inter-kernel communication

From: Cong Wang <cwang@...tikernel.io>

Introduce a structured messaging system built on top of the existing
multikernel IPI infrastructure to enable reliable communication between
kernel instances running on different CPUs.

The messaging layer provides:

* Simple message format with type/subtype hierarchy for extensibility
* Support for I/O interrupt forwarding between kernel instances
* Resource management messages for CPU and memory hotplug operations
* Type-safe payload structures with validation
* Handler registration system for message processing
* Convenient inline functions for common operations

Message types include:
- MK_MSG_IO: I/O interrupt forwarding and load balancing
- MK_MSG_RESOURCE: CPU/memory add/remove operations
- MK_MSG_SYSTEM: System-level coordination messages
- MK_MSG_USER: User-defined message types

The implementation leverages the reliable nature of intra-machine IPIs,
maintaining simplicity and performance. Messages are limited to the
existing 256-byte IPI buffer size, with larger data transfers handled
via the existing PFN-based shared memory mechanism.

This messaging foundation enables sophisticated multikernel coordination
scenarios including dynamic resource allocation, interrupt load
balancing, and system-wide state management.

Signed-off-by: Cong Wang <cwang@...tikernel.io>
---
 include/linux/multikernel.h    | 200 ++++++++++++++++++++++++
 kernel/multikernel/Makefile    |   2 +-
 kernel/multikernel/core.c      |   7 +
 kernel/multikernel/messaging.c | 278 +++++++++++++++++++++++++++++++++
 4 files changed, 486 insertions(+), 1 deletion(-)
 create mode 100644 kernel/multikernel/messaging.c

diff --git a/include/linux/multikernel.h b/include/linux/multikernel.h
index ee96bd2332b6..3bc07361145b 100644
--- a/include/linux/multikernel.h
+++ b/include/linux/multikernel.h
@@ -80,6 +80,206 @@ void *mk_receive_map_page(struct mk_ipi_data *data);
 
 #define mk_receive_unmap_page(p) memunmap(p)
 
+/*
+ * Multikernel Messaging System
+ */
+
+/**
+ * Message type definitions - organized by category
+ */
+
+/* Top-level message categories */
+#define MK_MSG_IO           0x1000
+#define MK_MSG_RESOURCE     0x2000
+#define MK_MSG_SYSTEM       0x3000
+#define MK_MSG_USER         0x4000
+
+/* I/O interrupt forwarding subtypes */
+#define MK_IO_IRQ_FORWARD   (MK_MSG_IO + 1)
+#define MK_IO_IRQ_BALANCE   (MK_MSG_IO + 2)
+#define MK_IO_IRQ_MASK      (MK_MSG_IO + 3)
+#define MK_IO_IRQ_UNMASK    (MK_MSG_IO + 4)
+
+/* Resource management subtypes */
+#define MK_RES_CPU_ADD      (MK_MSG_RESOURCE + 1)
+#define MK_RES_CPU_REMOVE   (MK_MSG_RESOURCE + 2)
+#define MK_RES_MEM_ADD      (MK_MSG_RESOURCE + 3)
+#define MK_RES_MEM_REMOVE   (MK_MSG_RESOURCE + 4)
+#define MK_RES_QUERY        (MK_MSG_RESOURCE + 5)
+
+/* System management subtypes */
+#define MK_SYS_HEARTBEAT    (MK_MSG_SYSTEM + 1)
+#define MK_SYS_SHUTDOWN     (MK_MSG_SYSTEM + 2)
+
+/**
+ * Core message structure
+ */
+struct mk_message {
+	u32 msg_type;           /* Message type identifier */
+	u32 msg_subtype;        /* Subtype for specific operations */
+	u64 msg_id;             /* Optional message ID for correlation */
+	u32 payload_len;        /* Length of payload data */
+	u8 payload[];           /* Variable payload (up to remaining IPI buffer) */
+};
+
+/**
+ * Payload structures for specific message types
+ */
+
+/* I/O interrupt forwarding */
+struct mk_io_irq_payload {
+	u32 irq_number;         /* Hardware IRQ number */
+	u32 vector;             /* Interrupt vector */
+	u32 device_id;          /* Device identifier (optional) */
+	u32 flags;              /* Control flags (priority, etc.) */
+};
+
+/* IRQ control flags */
+#define MK_IRQ_HIGH_PRIORITY    0x01
+#define MK_IRQ_LOW_LATENCY      0x02
+#define MK_IRQ_EDGE_TRIGGERED   0x04
+#define MK_IRQ_LEVEL_TRIGGERED  0x08
+
+/* CPU resource operations */
+struct mk_cpu_resource_payload {
+	u32 cpu_id;             /* Physical CPU ID */
+	u32 numa_node;          /* NUMA node (optional) */
+	u32 flags;              /* CPU capabilities/attributes */
+};
+
+/* CPU capability flags */
+#define MK_CPU_HAS_AVX512       0x01
+#define MK_CPU_HAS_TSX          0x02
+#define MK_CPU_HYPERTHREAD      0x04
+
+/* Memory resource operations */
+struct mk_mem_resource_payload {
+	u64 start_pfn;          /* Starting page frame number */
+	u64 nr_pages;           /* Number of pages */
+	u32 numa_node;          /* NUMA node */
+	u32 mem_type;           /* Memory type (normal/DMA/etc.) */
+};
+
+/* Memory types */
+#define MK_MEM_NORMAL           0x01
+#define MK_MEM_DMA              0x02
+#define MK_MEM_DMA32            0x04
+#define MK_MEM_HIGHMEM          0x08
+
+/**
+ * Message handler callback type
+ */
+typedef void (*mk_msg_handler_t)(u32 msg_type, u32 subtype,
+				 void *payload, u32 payload_len, void *ctx);
+
+/**
+ * Message API functions
+ */
+
+/**
+ * mk_send_message - Send a message to another CPU
+ * @instance_id: Target multikernel instance ID
+ * @msg_type: Message type identifier
+ * @subtype: Message subtype
+ * @payload: Pointer to payload data (can be NULL)
+ * @payload_len: Length of payload data
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_send_message(int instance_id, u32 msg_type, u32 subtype,
+		    void *payload, u32 payload_len);
+
+/**
+ * mk_register_msg_handler - Register handler for specific message type
+ * @msg_type: Message type to handle
+ * @handler: Handler function
+ * @ctx: Context pointer passed to handler
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_register_msg_handler(u32 msg_type, mk_msg_handler_t handler, void *ctx);
+
+/**
+ * mk_unregister_msg_handler - Unregister message handler
+ * @msg_type: Message type to unregister
+ * @handler: Handler function to remove
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_unregister_msg_handler(u32 msg_type, mk_msg_handler_t handler);
+
+/**
+ * Convenience functions for common message types
+ */
+
+/* I/O interrupt forwarding */
+static inline int mk_send_irq_forward(int instance_id, u32 irq_number,
+				      u32 vector, u32 device_id, u32 flags)
+{
+	struct mk_io_irq_payload payload = {
+		.irq_number = irq_number,
+		.vector = vector,
+		.device_id = device_id,
+		.flags = flags
+	};
+	return mk_send_message(instance_id, MK_MSG_IO, MK_IO_IRQ_FORWARD,
+			       &payload, sizeof(payload));
+}
+
+/* CPU resource management */
+static inline int mk_send_cpu_add(int instance_id, u32 cpu_id,
+				  u32 numa_node, u32 flags)
+{
+	struct mk_cpu_resource_payload payload = {
+		.cpu_id = cpu_id,
+		.numa_node = numa_node,
+		.flags = flags
+	};
+	return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_CPU_ADD,
+			       &payload, sizeof(payload));
+}
+
+static inline int mk_send_cpu_remove(int instance_id, u32 cpu_id)
+{
+	struct mk_cpu_resource_payload payload = {
+		.cpu_id = cpu_id,
+		.numa_node = 0,
+		.flags = 0
+	};
+	return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_CPU_REMOVE,
+			       &payload, sizeof(payload));
+}
+
+/* Memory resource management */
+static inline int mk_send_mem_add(int instance_id, u64 start_pfn, u64 nr_pages,
+				  u32 numa_node, u32 mem_type)
+{
+	struct mk_mem_resource_payload payload = {
+		.start_pfn = start_pfn,
+		.nr_pages = nr_pages,
+		.numa_node = numa_node,
+		.mem_type = mem_type
+	};
+	return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_MEM_ADD,
+			       &payload, sizeof(payload));
+}
+
+static inline int mk_send_mem_remove(int instance_id, u64 start_pfn, u64 nr_pages)
+{
+	struct mk_mem_resource_payload payload = {
+		.start_pfn = start_pfn,
+		.nr_pages = nr_pages,
+		.numa_node = 0,
+		.mem_type = 0
+	};
+	return mk_send_message(instance_id, MK_MSG_RESOURCE, MK_RES_MEM_REMOVE,
+			       &payload, sizeof(payload));
+}
+
+/* Messaging system functions */
+int __init mk_messaging_init(void);
+void mk_messaging_cleanup(void);
+
 struct resource;
 
 extern phys_addr_t multikernel_alloc(size_t size);
diff --git a/kernel/multikernel/Makefile b/kernel/multikernel/Makefile
index b539acc656c6..f133e1eaf534 100644
--- a/kernel/multikernel/Makefile
+++ b/kernel/multikernel/Makefile
@@ -3,7 +3,7 @@
 # Makefile for multikernel support
 #
 
-obj-y += core.o mem.o kernfs.o dts.o ipi.o
+obj-y += core.o mem.o kernfs.o dts.o ipi.o messaging.o
 
 # Add libfdt include path for device tree parsing
 CFLAGS_dts.o = -I $(srctree)/scripts/dtc/libfdt
diff --git a/kernel/multikernel/core.c b/kernel/multikernel/core.c
index ee7a21327ea5..37dbf0cf4be6 100644
--- a/kernel/multikernel/core.c
+++ b/kernel/multikernel/core.c
@@ -505,9 +505,16 @@ static int __init multikernel_init(void)
 {
 	int ret;
 
+	ret = mk_messaging_init();
+	if (ret < 0) {
+		pr_err("Failed to initialize multikernel messaging: %d\n", ret);
+		return ret;
+	}
+
 	ret = mk_kernfs_init();
 	if (ret < 0) {
 		pr_err("Failed to initialize multikernel sysfs interface: %d\n", ret);
+		mk_messaging_cleanup();
 		return ret;
 	}
 
diff --git a/kernel/multikernel/messaging.c b/kernel/multikernel/messaging.c
new file mode 100644
index 000000000000..be1fba8778ec
--- /dev/null
+++ b/kernel/multikernel/messaging.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Multikernel Messaging System
+ * Copyright (C) 2025 Multikernel Technologies, Inc. All rights reserved
+ *
+ * Simple messaging layer on top of multikernel IPI infrastructure
+ */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/multikernel.h>
+
+/* Per-type message handler registry */
+struct mk_msg_type_handler {
+	u32 msg_type;
+	struct mk_ipi_handler *ipi_handler;
+	mk_msg_handler_t msg_handler;
+	void *context;
+	struct mk_msg_type_handler *next;
+};
+
+static struct mk_msg_type_handler *mk_msg_type_handlers;
+static raw_spinlock_t mk_msg_type_handlers_lock = __RAW_SPIN_LOCK_UNLOCKED(mk_msg_type_handlers_lock);
+
+/**
+ * mk_message_type_ipi_callback - IPI callback to handle incoming messages for a specific type
+ * @data: IPI data containing the message
+ * @ctx: Context containing the message handler info
+ */
+static void mk_message_type_ipi_callback(struct mk_ipi_data *data, void *ctx)
+{
+	struct mk_msg_type_handler *type_handler = (struct mk_msg_type_handler *)ctx;
+	struct mk_message *msg;
+	u32 msg_type, msg_subtype;
+	void *payload;
+	u32 payload_len;
+
+	if (!type_handler || !type_handler->msg_handler) {
+		pr_warn("Multikernel message received but no handler registered\n");
+		return;
+	}
+
+	/* Verify this matches our expected message type */
+	if (data->type != type_handler->msg_type) {
+		pr_warn("Multikernel message type mismatch: expected 0x%x, got 0x%x\n",
+			type_handler->msg_type, data->type);
+		return;
+	}
+
+	/* Ensure we have at least a message header */
+	if (data->data_size < sizeof(struct mk_message)) {
+		pr_warn("Multikernel message too small: %zu bytes\n", data->data_size);
+		return;
+	}
+
+	msg = (struct mk_message *)data->buffer;
+
+	/* Validate message structure */
+	if (msg->payload_len > (data->data_size - sizeof(struct mk_message))) {
+		pr_warn("Multikernel message payload length invalid: %u > %zu\n",
+			msg->payload_len, data->data_size - sizeof(struct mk_message));
+		return;
+	}
+
+	msg_type = msg->msg_type;
+	msg_subtype = msg->msg_subtype;
+	payload = msg->payload_len > 0 ? msg->payload : NULL;
+	payload_len = msg->payload_len;
+
+	pr_debug("Multikernel message received: type=0x%x, subtype=0x%x, len=%u from CPU %d\n",
+		 msg_type, msg_subtype, payload_len, data->sender_cpu);
+
+	/* Call the registered handler for this message type */
+	type_handler->msg_handler(msg_type, msg_subtype, payload, payload_len, type_handler->context);
+}
+
+/**
+ * mk_send_message - Send a message to another CPU
+ * @instance_id: Target multikernel instance ID
+ * @msg_type: Message type identifier
+ * @subtype: Message subtype
+ * @payload: Pointer to payload data (can be NULL)
+ * @payload_len: Length of payload data
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_send_message(int instance_id, u32 msg_type, u32 subtype,
+		    void *payload, u32 payload_len)
+{
+	struct mk_message *msg;
+	size_t total_size;
+	int ret;
+
+	/* Calculate total message size */
+	total_size = sizeof(struct mk_message) + payload_len;
+
+	/* Check if message fits in IPI buffer */
+	if (total_size > MK_MAX_DATA_SIZE) {
+		pr_err("Multikernel message too large: %zu > %d bytes\n",
+		       total_size, MK_MAX_DATA_SIZE);
+		return -EMSGSIZE;
+	}
+
+	/* Allocate temporary buffer for message */
+	msg = kzalloc(total_size, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	/* Fill in message header */
+	msg->msg_type = msg_type;
+	msg->msg_subtype = subtype;
+	msg->msg_id = 0; /* Could be enhanced with unique IDs later */
+	msg->payload_len = payload_len;
+
+	/* Copy payload if provided */
+	if (payload && payload_len > 0)
+		memcpy(msg->payload, payload, payload_len);
+
+	/* Send via IPI using the message type as IPI type */
+	ret = multikernel_send_ipi_data(instance_id, msg, total_size, msg_type);
+
+	/* Clean up temporary buffer */
+	kfree(msg);
+
+	if (ret < 0) {
+		pr_err("Failed to send multikernel message: %d\n", ret);
+		return ret;
+	}
+
+	pr_debug("Multikernel message sent: type=0x%x, subtype=0x%x, len=%u to instance %d\n",
+		 msg_type, subtype, payload_len, instance_id);
+
+	return 0;
+}
+EXPORT_SYMBOL(mk_send_message);
+
+/**
+ * mk_register_msg_handler - Register handler for specific message type
+ * @msg_type: Message type to handle
+ * @handler: Handler function
+ * @ctx: Context pointer passed to handler
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_register_msg_handler(u32 msg_type, mk_msg_handler_t handler, void *ctx)
+{
+	struct mk_msg_type_handler *type_handler;
+	unsigned long flags;
+
+	if (!handler)
+		return -EINVAL;
+
+	/* Check if handler for this type already exists */
+	raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+	for (type_handler = mk_msg_type_handlers; type_handler; type_handler = type_handler->next) {
+		if (type_handler->msg_type == msg_type) {
+			raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+			pr_warn("Handler for message type 0x%x already registered\n", msg_type);
+			return -EEXIST;
+		}
+	}
+	raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+	/* Allocate new type handler entry */
+	type_handler = kzalloc(sizeof(*type_handler), GFP_KERNEL);
+	if (!type_handler)
+		return -ENOMEM;
+
+	type_handler->msg_type = msg_type;
+	type_handler->msg_handler = handler;
+	type_handler->context = ctx;
+
+	/* Register IPI handler for this message type */
+	type_handler->ipi_handler = multikernel_register_handler(mk_message_type_ipi_callback,
+									 type_handler, msg_type);
+	if (!type_handler->ipi_handler) {
+		pr_err("Failed to register IPI handler for message type 0x%x\n", msg_type);
+		kfree(type_handler);
+		return -ENOMEM;
+	}
+
+	/* Add to type handler list */
+	raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+	type_handler->next = mk_msg_type_handlers;
+	mk_msg_type_handlers = type_handler;
+	raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+	pr_debug("Registered multikernel message handler for type 0x%x\n", msg_type);
+	return 0;
+}
+EXPORT_SYMBOL(mk_register_msg_handler);
+
+/**
+ * mk_unregister_msg_handler - Unregister message handler
+ * @msg_type: Message type to unregister
+ * @handler: Handler function to remove
+ *
+ * Returns 0 on success, negative error code on failure
+ */
+int mk_unregister_msg_handler(u32 msg_type, mk_msg_handler_t handler)
+{
+	struct mk_msg_type_handler **pp, *type_handler;
+	unsigned long flags;
+	int found = 0;
+
+	if (!handler)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+	pp = &mk_msg_type_handlers;
+	while ((type_handler = *pp) != NULL) {
+		if (type_handler->msg_type == msg_type && type_handler->msg_handler == handler) {
+			*pp = type_handler->next;
+			found = 1;
+			break;
+		}
+		pp = &type_handler->next;
+	}
+	raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+	if (found) {
+		/* Unregister the IPI handler */
+		if (type_handler->ipi_handler) {
+			multikernel_unregister_handler(type_handler->ipi_handler);
+		}
+		kfree(type_handler);
+		pr_debug("Unregistered multikernel message handler for type 0x%x\n", msg_type);
+		return 0;
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL(mk_unregister_msg_handler);
+
+/**
+ * mk_messaging_init - Initialize the messaging system
+ *
+ * Called during multikernel initialization to set up message handling
+ * Returns 0 on success, negative error code on failure
+ */
+int __init mk_messaging_init(void)
+{
+	/* No global IPI handler needed anymore - handlers are registered per message type */
+	pr_info("Multikernel messaging system initialized\n");
+	return 0;
+}
+
+/**
+ * mk_messaging_cleanup - Cleanup the messaging system
+ *
+ * Called during multikernel cleanup
+ */
+void mk_messaging_cleanup(void)
+{
+	struct mk_msg_type_handler *type_handler, *next;
+	unsigned long flags;
+
+	/* Clean up all registered message type handlers */
+	raw_spin_lock_irqsave(&mk_msg_type_handlers_lock, flags);
+	type_handler = mk_msg_type_handlers;
+	mk_msg_type_handlers = NULL;
+	raw_spin_unlock_irqrestore(&mk_msg_type_handlers_lock, flags);
+
+	while (type_handler) {
+		next = type_handler->next;
+
+		/* Unregister IPI handler */
+		if (type_handler->ipi_handler) {
+			multikernel_unregister_handler(type_handler->ipi_handler);
+		}
+
+		kfree(type_handler);
+		type_handler = next;
+	}
+
+	pr_info("Multikernel messaging system cleaned up\n");
+}
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ