[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250918222607.186488-7-xiyou.wangcong@gmail.com>
Date: Thu, 18 Sep 2025 15:26:05 -0700
From: Cong Wang <xiyou.wangcong@...il.com>
To: linux-kernel@...r.kernel.org
Cc: pasha.tatashin@...een.com,
Cong Wang <cwang@...tikernel.io>,
Andrew Morton <akpm@...ux-foundation.org>,
Baoquan He <bhe@...hat.com>,
Alexander Graf <graf@...zon.com>,
Mike Rapoport <rppt@...nel.org>,
Changyuan Lyu <changyuanl@...gle.com>,
kexec@...ts.infradead.org,
linux-mm@...ck.org
Subject: [RFC Patch 6/7] kexec: Implement dynamic kimage tracking
From: Cong Wang <cwang@...tikernel.io>
Replace static kexec_image and kexec_crash_image globals with a dynamic
linked list infrastructure to support multiple kernel images. This change
enables multikernel functionality while maintaining backward compatibility.
Key changes:
- Add list_head member to kimage structure for chaining
- Implement thread-safe linked list management with global mutex
- Update kexec load/unload logic to use list-based APIs for multikernel
- Add helper functions for finding and managing multiple kimages
- Preserve existing kexec_image/kexec_crash_image pointers for compatibility
- Update architecture-specific crash handling to use new APIs
The multikernel case now properly uses list-based management instead of
overwriting compatibility pointers, allowing multiple multikernel images
to coexist in the system.
Signed-off-by: Cong Wang <cwang@...tikernel.io>
---
arch/powerpc/kexec/crash.c | 8 +-
arch/x86/kernel/crash.c | 4 +-
include/linux/kexec.h | 16 ++++
kernel/kexec.c | 62 +++++++++++++-
kernel/kexec_core.c | 165 ++++++++++++++++++++++++++++++++++++-
kernel/kexec_file.c | 33 +++++++-
6 files changed, 274 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index a325c1c02f96..af190fad4f22 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -477,13 +477,13 @@ static void update_crash_elfcorehdr(struct kimage *image, struct memory_notify *
ptr = __va(mem);
if (ptr) {
/* Temporarily invalidate the crash image while it is replaced */
- xchg(&kexec_crash_image, NULL);
+ kimage_update_compat_pointers(NULL, KEXEC_TYPE_CRASH);
/* Replace the old elfcorehdr with newly prepared elfcorehdr */
memcpy((void *)ptr, elfbuf, elfsz);
/* The crash image is now valid once again */
- xchg(&kexec_crash_image, image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_CRASH);
}
out:
kvfree(cmem);
@@ -537,14 +537,14 @@ static void update_crash_fdt(struct kimage *image)
fdt = __va((void *)image->segment[fdt_index].mem);
/* Temporarily invalidate the crash image while it is replaced */
- xchg(&kexec_crash_image, NULL);
+ kimage_update_compat_pointers(NULL, KEXEC_TYPE_CRASH);
/* update FDT to reflect changes in CPU resources */
if (update_cpus_node(fdt))
pr_err("Failed to update crash FDT");
/* The crash image is now valid once again */
- xchg(&kexec_crash_image, image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_CRASH);
}
int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c6b12bed173d..fc561d5e058e 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -546,9 +546,9 @@ void arch_crash_handle_hotplug_event(struct kimage *image, void *arg)
* Temporarily invalidate the crash image while the
* elfcorehdr is updated.
*/
- xchg(&kexec_crash_image, NULL);
+ kimage_update_compat_pointers(NULL, KEXEC_TYPE_CRASH);
memcpy_flushcache(old_elfcorehdr, elfbuf, elfsz);
- xchg(&kexec_crash_image, image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_CRASH);
kunmap_local(old_elfcorehdr);
pr_debug("updated elfcorehdr\n");
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a3ae3e561109..3bcbbacc0108 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -428,6 +428,9 @@ struct kimage {
/* dm crypt keys buffer */
unsigned long dm_crypt_keys_addr;
unsigned long dm_crypt_keys_sz;
+
+ /* For multikernel support: linked list node */
+ struct list_head list;
};
/* kexec interface functions */
@@ -531,6 +534,19 @@ extern bool kexec_file_dbg_print;
extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size);
extern void kimage_unmap_segment(void *buffer);
+
+/* Multikernel support functions */
+extern struct kimage *kimage_find_by_type(int type);
+extern void kimage_add_to_list(struct kimage *image);
+extern void kimage_remove_from_list(struct kimage *image);
+extern void kimage_update_compat_pointers(struct kimage *new_image, int type);
+extern int kimage_get_all_by_type(int type, struct kimage **images, int max_count);
+extern void kimage_list_lock(void);
+extern void kimage_list_unlock(void);
+extern struct kimage *kimage_find_multikernel_by_entry(unsigned long entry);
+extern struct kimage *kimage_get_multikernel_by_index(int index);
+extern int multikernel_kexec_by_entry(int cpu, unsigned long entry);
+extern void kimage_list_multikernel_images(void);
#else /* !CONFIG_KEXEC_CORE */
struct pt_regs;
struct task_struct;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 49e62f804674..3d37925ee15a 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -147,7 +147,31 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
if (nr_segments == 0) {
/* Uninstall image */
- kimage_free(xchg(dest_image, NULL));
+ if (flags & KEXEC_ON_CRASH) {
+ struct kimage *old_image = xchg(&kexec_crash_image, NULL);
+ if (old_image) {
+ kimage_remove_from_list(old_image);
+ kimage_free(old_image);
+ }
+ } else if (flags & KEXEC_MULTIKERNEL) {
+ /* For multikernel unload, we need to specify which image to remove */
+ /* For now, remove all multikernel images - this could be enhanced */
+ struct kimage *images[10];
+ int count, i;
+
+ count = kimage_get_all_by_type(KEXEC_TYPE_MULTIKERNEL, images, 10);
+ for (i = 0; i < count; i++) {
+ kimage_remove_from_list(images[i]);
+ kimage_free(images[i]);
+ }
+ pr_info("Unloaded %d multikernel images\n", count);
+ } else {
+ struct kimage *old_image = xchg(&kexec_image, NULL);
+ if (old_image) {
+ kimage_remove_from_list(old_image);
+ kimage_free(old_image);
+ }
+ }
ret = 0;
goto out_unlock;
}
@@ -157,7 +181,11 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
* crashes. Free any current crash dump kernel before
* we corrupt it.
*/
- kimage_free(xchg(&kexec_crash_image, NULL));
+ struct kimage *old_crash_image = xchg(&kexec_crash_image, NULL);
+ if (old_crash_image) {
+ kimage_remove_from_list(old_crash_image);
+ kimage_free(old_crash_image);
+ }
}
ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
@@ -199,7 +227,35 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
goto out;
/* Install the new kernel and uninstall the old */
- image = xchg(dest_image, image);
+ if (flags & KEXEC_ON_CRASH) {
+ struct kimage *old_image = xchg(&kexec_crash_image, image);
+ if (old_image) {
+ kimage_remove_from_list(old_image);
+ kimage_free(old_image);
+ }
+ if (image) {
+ kimage_add_to_list(image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_CRASH);
+ }
+ image = NULL; /* Don't free the new image */
+ } else if (flags & KEXEC_MULTIKERNEL) {
+ if (image) {
+ kimage_add_to_list(image);
+ pr_info("Added multikernel image to list (entry: 0x%lx)\n", image->start);
+ }
+ image = NULL; /* Don't free the new image */
+ } else {
+ struct kimage *old_image = xchg(&kexec_image, image);
+ if (old_image) {
+ kimage_remove_from_list(old_image);
+ kimage_free(old_image);
+ }
+ if (image) {
+ kimage_add_to_list(image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_DEFAULT);
+ }
+ image = NULL; /* Don't free the new image */
+ }
out:
#ifdef CONFIG_CRASH_DUMP
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 35a66c8dd78b..4e489a7031e6 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -56,6 +56,10 @@ bool kexec_in_progress = false;
bool kexec_file_dbg_print;
+/* Linked list of dynamically allocated kimages */
+static LIST_HEAD(kexec_image_list);
+static DEFINE_MUTEX(kexec_image_mutex);
+
/*
* When kexec transitions to the new kernel there is a one-to-one
* mapping between physical and virtual addresses. On processors
@@ -275,6 +279,9 @@ struct kimage *do_kimage_alloc_init(void)
/* Initialize the list of unusable pages */
INIT_LIST_HEAD(&image->unusable_pages);
+ /* Initialize the list node for multikernel support */
+ INIT_LIST_HEAD(&image->list);
+
#ifdef CONFIG_CRASH_HOTPLUG
image->hp_action = KEXEC_CRASH_HP_NONE;
image->elfcorehdr_index = -1;
@@ -607,6 +614,13 @@ void kimage_free(struct kimage *image)
if (!image)
return;
+ /* Remove from linked list and update compatibility pointers */
+ kimage_remove_from_list(image);
+ if (image == kexec_image)
+ kimage_update_compat_pointers(NULL, KEXEC_TYPE_DEFAULT);
+ else if (image == kexec_crash_image)
+ kimage_update_compat_pointers(NULL, KEXEC_TYPE_CRASH);
+
#ifdef CONFIG_CRASH_DUMP
if (image->vmcoreinfo_data_copy) {
crash_update_vmcoreinfo_safecopy(NULL);
@@ -1123,6 +1137,72 @@ void kimage_unmap_segment(void *segment_buffer)
vunmap(segment_buffer);
}
+void kimage_add_to_list(struct kimage *image)
+{
+ mutex_lock(&kexec_image_mutex);
+ list_add_tail(&image->list, &kexec_image_list);
+ mutex_unlock(&kexec_image_mutex);
+}
+
+void kimage_remove_from_list(struct kimage *image)
+{
+ mutex_lock(&kexec_image_mutex);
+ if (!list_empty(&image->list))
+ list_del_init(&image->list);
+ mutex_unlock(&kexec_image_mutex);
+}
+
+struct kimage *kimage_find_by_type(int type)
+{
+ struct kimage *image;
+
+ mutex_lock(&kexec_image_mutex);
+ list_for_each_entry(image, &kexec_image_list, list) {
+ if (image->type == type) {
+ mutex_unlock(&kexec_image_mutex);
+ return image;
+ }
+ }
+ mutex_unlock(&kexec_image_mutex);
+ return NULL;
+}
+
+void kimage_update_compat_pointers(struct kimage *new_image, int type)
+{
+ mutex_lock(&kexec_image_mutex);
+ if (type == KEXEC_TYPE_CRASH) {
+ kexec_crash_image = new_image;
+ } else if (type == KEXEC_TYPE_DEFAULT) {
+ kexec_image = new_image;
+ }
+ mutex_unlock(&kexec_image_mutex);
+}
+
+int kimage_get_all_by_type(int type, struct kimage **images, int max_count)
+{
+ struct kimage *image;
+ int count = 0;
+
+ mutex_lock(&kexec_image_mutex);
+ list_for_each_entry(image, &kexec_image_list, list) {
+ if (image->type == type && count < max_count) {
+ images[count++] = image;
+ }
+ }
+ mutex_unlock(&kexec_image_mutex);
+ return count;
+}
+
+void kimage_list_lock(void)
+{
+ mutex_lock(&kexec_image_mutex);
+}
+
+void kimage_list_unlock(void)
+{
+ mutex_unlock(&kexec_image_mutex);
+}
+
struct kexec_load_limit {
/* Mutex protects the limit count. */
struct mutex mutex;
@@ -1139,6 +1219,7 @@ static struct kexec_load_limit load_limit_panic = {
.limit = -1,
};
+/* Compatibility: maintain pointers to current default and crash images */
struct kimage *kexec_image;
struct kimage *kexec_crash_image;
static int kexec_load_disabled;
@@ -1339,8 +1420,49 @@ int kernel_kexec(void)
return error;
}
+/*
+ * Find a multikernel image by entry point
+ */
+struct kimage *kimage_find_multikernel_by_entry(unsigned long entry)
+{
+ struct kimage *image;
+
+ kimage_list_lock();
+ list_for_each_entry(image, &kexec_image_list, list) {
+ if (image->type == KEXEC_TYPE_MULTIKERNEL && image->start == entry) {
+ kimage_list_unlock();
+ return image;
+ }
+ }
+ kimage_list_unlock();
+ return NULL;
+}
+
+/*
+ * Get multikernel image by index (0-based)
+ */
+struct kimage *kimage_get_multikernel_by_index(int index)
+{
+ struct kimage *image;
+ int count = 0;
+
+ kimage_list_lock();
+ list_for_each_entry(image, &kexec_image_list, list) {
+ if (image->type == KEXEC_TYPE_MULTIKERNEL) {
+ if (count == index) {
+ kimage_list_unlock();
+ return image;
+ }
+ count++;
+ }
+ }
+ kimage_list_unlock();
+ return NULL;
+}
+
int multikernel_kexec(int cpu)
{
+ struct kimage *mk_image;
int rc;
pr_info("multikernel kexec: cpu %d\n", cpu);
@@ -1352,13 +1474,52 @@ int multikernel_kexec(int cpu)
if (!kexec_trylock())
return -EBUSY;
- if (!kexec_image) {
+
+ mk_image = kimage_find_by_type(KEXEC_TYPE_MULTIKERNEL);
+ if (!mk_image) {
+ pr_err("No multikernel image loaded\n");
rc = -EINVAL;
goto unlock;
}
+ pr_info("Found multikernel image with entry point: 0x%lx\n", mk_image->start);
+
+ cpus_read_lock();
+ rc = multikernel_kick_ap(cpu, mk_image->start);
+ cpus_read_unlock();
+
+unlock:
+ kexec_unlock();
+ return rc;
+}
+
+int multikernel_kexec_by_entry(int cpu, unsigned long entry)
+{
+ struct kimage *mk_image;
+ int rc;
+
+ pr_info("multikernel kexec: cpu %d, entry 0x%lx\n", cpu, entry);
+
+ if (cpu_online(cpu)) {
+ pr_err("The CPU is currently running with this kernel instance.");
+ return -EBUSY;
+ }
+
+ if (!kexec_trylock())
+ return -EBUSY;
+
+ /* Find the specific multikernel image by entry point */
+ mk_image = kimage_find_multikernel_by_entry(entry);
+ if (!mk_image) {
+ pr_err("No multikernel image found with entry point 0x%lx\n", entry);
+ rc = -EINVAL;
+ goto unlock;
+ }
+
+ pr_info("Using multikernel image with entry point: 0x%lx\n", mk_image->start);
+
cpus_read_lock();
- rc = multikernel_kick_ap(cpu, kexec_image->start);
+ rc = multikernel_kick_ap(cpu, mk_image->start);
cpus_read_unlock();
unlock:
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 91d46502a817..d4b8831eb59c 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -399,8 +399,13 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
* same memory where old crash kernel might be loaded. Free any
* current crash dump kernel before we corrupt it.
*/
- if (flags & KEXEC_FILE_ON_CRASH)
- kimage_free(xchg(&kexec_crash_image, NULL));
+ if (flags & KEXEC_FILE_ON_CRASH) {
+ struct kimage *old_crash_image = xchg(&kexec_crash_image, NULL);
+ if (old_crash_image) {
+ kimage_remove_from_list(old_crash_image);
+ kimage_free(old_crash_image);
+ }
+ }
ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
cmdline_len, flags);
@@ -456,7 +461,29 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
*/
kimage_file_post_load_cleanup(image);
exchange:
- image = xchg(dest_image, image);
+ if (image_type == KEXEC_TYPE_CRASH) {
+ struct kimage *old_image = xchg(&kexec_crash_image, image);
+ if (old_image) {
+ kimage_remove_from_list(old_image);
+ kimage_free(old_image);
+ }
+ if (image) {
+ kimage_add_to_list(image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_CRASH);
+ }
+ image = NULL; /* Don't free the new image */
+ } else {
+ struct kimage *old_image = xchg(&kexec_image, image);
+ if (old_image) {
+ kimage_remove_from_list(old_image);
+ kimage_free(old_image);
+ }
+ if (image) {
+ kimage_add_to_list(image);
+ kimage_update_compat_pointers(image, KEXEC_TYPE_DEFAULT);
+ }
+ image = NULL; /* Don't free the new image */
+ }
out:
#ifdef CONFIG_CRASH_DUMP
if ((flags & KEXEC_FILE_ON_CRASH) && kexec_crash_image)
--
2.34.1
Powered by blists - more mailing lists