[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220725083904.56552-2-huangjie.albert@bytedance.com>
Date: Mon, 25 Jul 2022 16:38:53 +0800
From: Albert Huang <huangjie.albert@...edance.com>
To: unlisted-recipients:; (no To-header on input)
Cc: "huangjie.albert" <huangjie.albert@...edance.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
"H. Peter Anvin" <hpa@...or.com>,
Eric Biederman <ebiederm@...ssion.com>,
Masahiro Yamada <masahiroy@...nel.org>,
Michal Marek <michal.lkml@...kovi.net>,
Nick Desaulniers <ndesaulniers@...gle.com>,
"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
Kuppuswamy Sathyanarayanan
<sathyanarayanan.kuppuswamy@...ux.intel.com>,
Michael Roth <michael.roth@....com>,
Nathan Chancellor <nathan@...nel.org>,
Ard Biesheuvel <ardb@...nel.org>,
Joerg Roedel <jroedel@...e.de>,
Mark Rutland <mark.rutland@....com>,
Peter Zijlstra <peterz@...radead.org>,
Sean Christopherson <seanjc@...gle.com>,
Kees Cook <keescook@...omium.org>,
linux-kernel@...r.kernel.org, kexec@...ts.infradead.org,
linux-kbuild@...r.kernel.org
Subject: [PATCH 1/4] kexec: reuse crash kernel reserved memory for normal kexec
From: "huangjie.albert" <huangjie.albert@...edance.com>
normally, for kexec reboot, each segment of the second os
(such as : kernel、initrd、etc.) will be copied to discontinuous
physical memory during kexec load. and then a memory copy will
be performed when kexec -e is executed to copy each segment of
the second os to contiguous physical memory, which will Affects
the time the kexec switch to the new os. Therefore, if we reuse
the crash kernel reserved memory for kexec. When kexec loads the
second os, each segment of the second OS is directly copied to the
contiguous physical memory, so there is no need to make a second copy
when kexec -e is executed later.
The kexec userspace tool also needs to add parameter options(-r) that
support the use of reserved memory (see another patch for kexec)
examples:
bzimage: 53M initramfs: 28M
can save aboat 40 ms, The larger the image size, the greater the time
savings
Signed-off-by: huangjie.albert <huangjie.albert@...edance.com>
---
include/linux/kexec.h | 9 +++++----
include/uapi/linux/kexec.h | 2 ++
kernel/kexec.c | 19 ++++++++++++++++++-
kernel/kexec_core.c | 16 +++++++++-------
kernel/kexec_file.c | 20 ++++++++++++++++++--
5 files changed, 52 insertions(+), 14 deletions(-)
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 475683cd67f1..9a8b9932b42a 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -305,9 +305,10 @@ struct kimage {
unsigned long control_page;
/* Flags to indicate special processing */
- unsigned int type : 1;
+ unsigned int type : 2;
#define KEXEC_TYPE_DEFAULT 0
#define KEXEC_TYPE_CRASH 1
+#define KEXEC_TYPE_RESERVED_MEM 2
unsigned int preserve_context : 1;
/* If set, we are using file mode kexec syscall */
unsigned int file_mode:1;
@@ -377,14 +378,14 @@ extern int kexec_load_disabled;
/* List of defined/legal kexec flags */
#ifndef CONFIG_KEXEC_JUMP
-#define KEXEC_FLAGS KEXEC_ON_CRASH
+#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_RESERVED_MEM)
#else
-#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
+#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT | KEXEC_RESERVED_MEM)
#endif
/* List of defined/legal kexec file flags */
#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
- KEXEC_FILE_NO_INITRAMFS)
+ KEXEC_FILE_NO_INITRAMFS | KEXEC_FILE_RESERVED_MEM)
/* flag to track if kexec reboot is in progress */
extern bool kexec_in_progress;
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index 981016e05cfa..c29011eb7fc2 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -12,6 +12,7 @@
/* kexec flags for different usage scenarios */
#define KEXEC_ON_CRASH 0x00000001
#define KEXEC_PRESERVE_CONTEXT 0x00000002
+#define KEXEC_RESERVED_MEM 0x00000004
#define KEXEC_ARCH_MASK 0xffff0000
/*
@@ -24,6 +25,7 @@
#define KEXEC_FILE_UNLOAD 0x00000001
#define KEXEC_FILE_ON_CRASH 0x00000002
#define KEXEC_FILE_NO_INITRAMFS 0x00000004
+#define KEXEC_FILE_RESERVED_MEM 0x00000008
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
diff --git a/kernel/kexec.c b/kernel/kexec.c
index b5e40f069768..0d9ea52c81c1 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -27,8 +27,14 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
int ret;
struct kimage *image;
bool kexec_on_panic = flags & KEXEC_ON_CRASH;
+ bool kexec_on_reserved = flags & KEXEC_RESERVED_MEM;
- if (kexec_on_panic) {
+ if (kexec_on_panic && kexec_on_reserved) {
+ pr_err("both kexec_on_panic and kexec_on_reserved is true, they can not coexist");
+ return -EINVAL;
+ }
+
+ if (kexec_on_panic || kexec_on_reserved) {
/* Verify we have a valid entry point */
if ((entry < phys_to_boot_phys(crashk_res.start)) ||
(entry > phys_to_boot_phys(crashk_res.end)))
@@ -50,6 +56,12 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
image->type = KEXEC_TYPE_CRASH;
}
+ if (kexec_on_reserved) {
+ /* Enable special reserved kernel control page alloc policy. */
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_RESERVED_MEM;
+ }
+
ret = sanity_check_segment_list(image);
if (ret)
goto out_free_image;
@@ -110,6 +122,11 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
dest_image = &kexec_image;
}
+ if (flags & KEXEC_RESERVED_MEM) {
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+ }
+
if (nr_segments == 0) {
/* Uninstall image */
kimage_free(xchg(dest_image, NULL));
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 4d34c78334ce..6220c2e0d6f7 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -230,13 +230,13 @@ int sanity_check_segment_list(struct kimage *image)
* Verify we have good destination addresses. Normally
* the caller is responsible for making certain we don't
* attempt to load the new image into invalid or reserved
- * areas of RAM. But crash kernels are preloaded into a
+ * areas of RAM. But crash kernels (or we specify to load
+ * the new image into reserved areas) are preloaded into a
* reserved area of ram. We must ensure the addresses
* are in the reserved area otherwise preloading the
* kernel could corrupt things.
*/
-
- if (image->type == KEXEC_TYPE_CRASH) {
+ if (image->type == KEXEC_TYPE_CRASH || image->type == KEXEC_TYPE_RESERVED_MEM) {
for (i = 0; i < nr_segments; i++) {
unsigned long mstart, mend;
@@ -414,7 +414,7 @@ static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
return pages;
}
-static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
+static struct page *kimage_alloc_reserverd_control_pages(struct kimage *image,
unsigned int order)
{
/* Control pages are special, they are the intermediaries
@@ -491,7 +491,8 @@ struct page *kimage_alloc_control_pages(struct kimage *image,
pages = kimage_alloc_normal_control_pages(image, order);
break;
case KEXEC_TYPE_CRASH:
- pages = kimage_alloc_crash_control_pages(image, order);
+ case KEXEC_TYPE_RESERVED_MEM:
+ pages = kimage_alloc_reserverd_control_pages(image, order);
break;
}
@@ -846,7 +847,7 @@ static int kimage_load_normal_segment(struct kimage *image,
return result;
}
-static int kimage_load_crash_segment(struct kimage *image,
+static int kimage_load_reserved_segment(struct kimage *image,
struct kexec_segment *segment)
{
/* For crash dumps kernels we simply copy the data from
@@ -924,7 +925,8 @@ int kimage_load_segment(struct kimage *image,
result = kimage_load_normal_segment(image, segment);
break;
case KEXEC_TYPE_CRASH:
- result = kimage_load_crash_segment(image, segment);
+ case KEXEC_TYPE_RESERVED_MEM:
+ result = kimage_load_reserved_segment(image, segment);
break;
}
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index f9261c07b048..5242ad7e5302 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -277,7 +277,7 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
int ret;
struct kimage *image;
bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
-
+ bool kexec_on_reserved = flags & KEXEC_FILE_RESERVED_MEM;
image = do_kimage_alloc_init();
if (!image)
return -ENOMEM;
@@ -290,6 +290,12 @@ kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
image->type = KEXEC_TYPE_CRASH;
}
+ if (kexec_on_reserved) {
+ /* Enable special crash kernel control page alloc policy. */
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_RESERVED_MEM;
+ }
+
ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
cmdline_ptr, cmdline_len, flags);
if (ret)
@@ -346,6 +352,11 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
if (!mutex_trylock(&kexec_mutex))
return -EBUSY;
+ if ((flags & KEXEC_FILE_ON_CRASH) && (flags & KEXEC_FILE_RESERVED_MEM)) {
+ pr_err("both kexec_on_panic and kexec_on_reserved is true, they can not coexist");
+ return -EINVAL;
+ }
+
dest_image = &kexec_image;
if (flags & KEXEC_FILE_ON_CRASH) {
dest_image = &kexec_crash_image;
@@ -353,6 +364,11 @@ SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
arch_kexec_unprotect_crashkres();
}
+ if (flags & KEXEC_FILE_RESERVED_MEM) {
+ if (kexec_crash_image)
+ arch_kexec_unprotect_crashkres();
+ }
+
if (flags & KEXEC_FILE_UNLOAD)
goto exchange;
@@ -588,7 +604,7 @@ static int kexec_walk_memblock(struct kexec_buf *kbuf,
static int kexec_walk_resources(struct kexec_buf *kbuf,
int (*func)(struct resource *, void *))
{
- if (kbuf->image->type == KEXEC_TYPE_CRASH)
+ if (kbuf->image->type == KEXEC_TYPE_CRASH || kbuf->image->type == KEXEC_TYPE_RESERVED_MEM)
return walk_iomem_res_desc(crashk_res.desc,
IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
crashk_res.start, crashk_res.end,
--
2.31.1
Powered by blists - more mailing lists