[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260119032424.10781-5-piliu@redhat.com>
Date: Mon, 19 Jan 2026 11:24:15 +0800
From: Pingfan Liu <piliu@...hat.com>
To: kexec@...ts.infradead.org
Cc: Pingfan Liu <piliu@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
John Fastabend <john.fastabend@...il.com>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Eduard Zingerman <eddyz87@...il.com>,
Song Liu <song@...nel.org>,
Yonghong Song <yonghong.song@...ux.dev>,
Jeremy Linton <jeremy.linton@....com>,
Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>,
Ard Biesheuvel <ardb@...nel.org>,
Simon Horman <horms@...nel.org>,
Gerd Hoffmann <kraxel@...hat.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Philipp Rudo <prudo@...hat.com>,
Viktor Malik <vmalik@...hat.com>,
Jan Hendrik Farr <kernel@...rr.cc>,
Baoquan He <bhe@...hat.com>,
Dave Young <dyoung@...hat.com>,
Andrew Morton <akpm@...ux-foundation.org>,
bpf@...r.kernel.org,
systemd-devel@...ts.freedesktop.org,
linux-kernel@...r.kernel.org
Subject: [PATCHv6 04/13] kexec_file: Use bpf-prog to decompose image
As UEFI becomes popular, a few architectures support to boot a PE format
kernel image directly. But the internal of PE format varies, which means
each parser for each format.
This patch (with the rest in this series) introduces a common skeleton
to all parsers, and leave the format parsing in
bpf-prog, so the kernel code can keep relative stable.
History, the syscall
SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
unsigned long, cmdline_len, const char __user *, cmdline_ptr,
unsigned long, flags)
complies with the kernel protocol: bootable kernel, initramfs, cmdline.
But the occurrence of UKI images challenges the traditional model. The
image itself contains the kernel, initrd, and cmdline. To be compatible
with both the old and new models, kexec_file_load can be reorganized into
two stages. In the first stage, "decompose_kexec_image()" breaks down the
passed-in image into the components required by the kernel boot protocol.
In the second stage, the traditional image loader
"arch_kexec_kernel_image_load()" prepares the switch to the next kernel.
During the decomposition stage, the decomposition process can be nested.
In each sub-process, BPF bytecode is extracted from the '.bpf' section
to parse the current PE file. If the data section in the PE file contains
another PE file, the sub-process is repeated. This is designed to handle
the zboot format embedded in UKI format on the arm64 platform.
There are some placeholder functions in this patch. (They will take effect
after the introduction of kexec BPF light skeleton and BPF helpers.)
Signed-off-by: Pingfan Liu <piliu@...hat.com>
Cc: Baoquan He <bhe@...hat.com>
Cc: Dave Young <dyoung@...hat.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Philipp Rudo <prudo@...hat.com>
To: kexec@...ts.infradead.org
---
kernel/Kconfig.kexec | 8 ++
kernel/Makefile | 2 +-
kernel/kexec_bpf_loader.c | 161 ++++++++++++++++++++++++++++++++++++++
kernel/kexec_file.c | 9 ++-
kernel/kexec_internal.h | 1 +
5 files changed, 179 insertions(+), 2 deletions(-)
create mode 100644 kernel/kexec_bpf_loader.c
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 15632358bcf71..0c5d619820bcd 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -46,6 +46,14 @@ config KEXEC_FILE
for kernel and initramfs as opposed to list of segments as
accepted by kexec system call.
+config KEXEC_BPF
+ bool "Enable bpf-prog to parse the kexec image"
+ depends on KEXEC_FILE
+ depends on DEBUG_INFO_BTF && BPF_SYSCALL
+ help
+ This is a feature to run bpf section inside a kexec image file, which
+ parses the image properly and help kernel set up kexec boot protocol
+
config KEXEC_SIG
bool "Verify kernel signature during kexec_file_load() syscall"
depends on ARCH_SUPPORTS_KEXEC_SIG
diff --git a/kernel/Makefile b/kernel/Makefile
index f9e85c4a0622b..05177a867690d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -83,7 +83,7 @@ obj-$(CONFIG_CRASH_DUMP_KUNIT_TEST) += crash_core_test.o
obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
obj-$(CONFIG_KEXEC_ELF) += kexec_elf.o
-obj-$(CONFIG_KEXEC_BPF) += kexec_uefi_app.o
+obj-$(CONFIG_KEXEC_BPF) += kexec_bpf_loader.o kexec_uefi_app.o
obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup/
diff --git a/kernel/kexec_bpf_loader.c b/kernel/kexec_bpf_loader.c
new file mode 100644
index 0000000000000..dc59e1389da94
--- /dev/null
+++ b/kernel/kexec_bpf_loader.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Kexec image bpf section helpers
+ *
+ * Copyright (C) 2025, 2026 Red Hat, Inc
+ */
+
+#define pr_fmt(fmt) "kexec_file(Image): " fmt
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/vmalloc.h>
+#include <linux/kexec.h>
+#include <linux/elf.h>
+#include <linux/string.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <asm/byteorder.h>
+#include <asm/image.h>
+#include <asm/memory.h>
+#include "kexec_internal.h"
+
+/* Load a ELF */
+static int arm_bpf_prog(char *bpf_elf, unsigned long sz)
+{
+ return 0;
+}
+
+static void disarm_bpf_prog(void)
+{
+}
+
+struct kexec_context {
+ bool kdump;
+ char *kernel;
+ int kernel_sz;
+ char *initrd;
+ int initrd_sz;
+ char *cmdline;
+ int cmdline_sz;
+};
+
+void kexec_image_parser_anchor(struct kexec_context *context,
+ unsigned long parser_id);
+
+/*
+ * optimize("O0") prevents inline, compiler constant propagation
+ *
+ * Let bpf be the program context pointer so that it will not be spilled into
+ * stack.
+ */
+__attribute__((used, optimize("O0"))) void kexec_image_parser_anchor(
+ struct kexec_context *context,
+ unsigned long parser_id)
+{
+ /*
+ * To prevent linker from Identical Code Folding (ICF) with kexec_image_parser_anchor,
+ * making them have different code.
+ */
+ volatile int dummy = 0;
+
+ dummy += 1;
+}
+
+
+BTF_KFUNCS_START(kexec_modify_return_ids)
+BTF_ID_FLAGS(func, kexec_image_parser_anchor, KF_SLEEPABLE)
+BTF_KFUNCS_END(kexec_modify_return_ids)
+
+static const struct btf_kfunc_id_set kexec_modify_return_set = {
+ .owner = THIS_MODULE,
+ .set = &kexec_modify_return_ids,
+};
+
+static int __init kexec_bpf_prog_run_init(void)
+{
+ return register_btf_fmodret_id_set(&kexec_modify_return_set);
+}
+late_initcall(kexec_bpf_prog_run_init);
+
+static int kexec_buff_parser(struct bpf_parser_context *parser)
+{
+ return 0;
+}
+
+/* At present, only PE format file with .bpf section is supported */
+#define file_has_bpf_section pe_has_bpf_section
+#define file_get_section pe_get_section
+
+int decompose_kexec_image(struct kimage *image, int extended_fd)
+{
+ struct kexec_context context = { 0 };
+ struct bpf_parser_context *bpf;
+ unsigned long kernel_sz, bpf_sz;
+ char *kernel_start, *bpf_start;
+ int ret = 0;
+
+ if (image->type != KEXEC_TYPE_CRASH)
+ context.kdump = false;
+ else
+ context.kdump = true;
+
+ kernel_start = image->kernel_buf;
+ kernel_sz = image->kernel_buf_len;
+
+ while (file_has_bpf_section(kernel_start, kernel_sz)) {
+
+ bpf = alloc_bpf_parser_context(kexec_buff_parser, &context);
+ if (!bpf)
+ return -ENOMEM;
+ file_get_section((const char *)kernel_start, ".bpf", &bpf_start, &bpf_sz);
+ if (!!bpf_sz) {
+ /* load and attach bpf-prog */
+ ret = arm_bpf_prog(bpf_start, bpf_sz);
+ if (ret) {
+ put_bpf_parser_context(bpf);
+ pr_err("Fail to load .bpf section\n");
+ goto err;
+ }
+ }
+ context.kernel = kernel_start;
+ context.kernel_sz = kernel_sz;
+ /* bpf-prog fentry, which handle above buffers. */
+ kexec_image_parser_anchor(&context, (unsigned long)bpf);
+
+ /*
+ * Container may be nested and should be unfold one by one.
+ * The former bpf-prog should prepare 'kernel', 'initrd',
+ * 'cmdline' for the next phase by calling kexec_buff_parser()
+ */
+ kernel_start = context.kernel;
+ kernel_sz = context.kernel_sz;
+
+ /*
+ * detach the current bpf-prog from their attachment points.
+ */
+ disarm_bpf_prog();
+ put_bpf_parser_context(bpf);
+ }
+
+ /*
+ * image's kernel_buf, initrd_buf, cmdline_buf are set. Now they should
+ * be updated to the new content.
+ */
+ image->kernel_buf = context.kernel;
+ image->kernel_buf_len = context.kernel_sz;
+ image->initrd_buf = context.initrd;
+ image->initrd_buf_len = context.initrd_sz;
+ image->cmdline_buf = context.cmdline;
+ image->cmdline_buf_len = context.cmdline_sz;
+
+ return 0;
+err:
+ vfree(context.kernel);
+ vfree(context.initrd);
+ vfree(context.cmdline);
+ return ret;
+}
+
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 0222d17072d40..f9674bb5bd8db 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -238,7 +238,14 @@ kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
goto out;
#endif
- /* Call arch image probe handlers */
+ if (IS_ENABLED(CONFIG_KEXEC_BPF))
+ decompose_kexec_image(image, initrd_fd);
+
+ /*
+ * From this point, the kexec subsystem handle the kernel boot protocol.
+ *
+ * Call arch image probe handlers
+ */
ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
image->kernel_buf_len);
if (ret)
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 8e5e5c1237732..ee01d0c8bb377 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -39,6 +39,7 @@ extern size_t kexec_purgatory_size;
extern bool pe_has_bpf_section(const char *file_buf, unsigned long pe_sz);
extern int pe_get_section(const char *file_buf, const char *sect_name,
char **sect_start, unsigned long *sect_sz);
+extern int decompose_kexec_image(struct kimage *image, int extended_fd);
#else /* CONFIG_KEXEC_FILE */
static inline void kimage_file_post_load_cleanup(struct kimage *image) { }
#endif /* CONFIG_KEXEC_FILE */
--
2.49.0
Powered by blists - more mailing lists