[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aWIBvS4F8GYBVtgT@nuc10>
Date: Fri, 9 Jan 2026 23:58:45 -0800
From: Rustam Kovhaev <rkovhaev@...il.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Rustam Kovhaev <rkovhaev@...il.com>,
Sami Tolvanen <samitolvanen@...gle.com>,
Alexei Starovoitov <ast@...nel.org>, linux-kernel@...r.kernel.org,
bpf@...r.kernel.org
Subject: Re: kernel crashes in BPF JIT code with kCFI and clang on x86
On Wed, Jan 07, 2026 at 04:36:03PM +0100, Peter Zijlstra wrote:
> On Wed, Jan 07, 2026 at 10:36:39AM +0100, Peter Zijlstra wrote:
> > On Mon, Dec 22, 2025 at 07:43:32PM -0800, Rustam Kovhaev wrote:
> >
> > >
> > > After switching to clang kbuild always generates these huge paddings in my kernel config:
> > > rusty@...10:~/code/kbuild_rust$ grep -e IBT -e PADDING .config
> > > CONFIG_CC_HAS_IBT=y
> > > CONFIG_X86_KERNEL_IBT=y
> > > CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa
> > > CONFIG_CC_HAS_ENTRY_PADDING=y
> > > CONFIG_FUNCTION_PADDING_CFI=59
> > > CONFIG_FUNCTION_PADDING_BYTES=59
> > > CONFIG_CALL_PADDING=y
> > > CONFIG_FINEIBT=y
> >
> > Oh gawd, you have FUNCTION_ALIGNMENT_64B. Yeah, I suppose that wasn't
> > tested very well.
>
> You appear to have CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B=y; is there any
> particular reason you have that on?
Hi, Peter,
Thank you for helping me out with this patch.
I have this option on just to make sure I always get consistent results
in perf regardless of changes in functions alignments.
>
> > Let me go check all that code.
>
> I've ended up with the below; this appears to boot (on my ADL) with
> "paranoid FineIBT" and "kCFI" options tested so far.
>
> ---
> arch/x86/include/asm/cfi.h | 12 ++++++++----
> arch/x86/include/asm/linkage.h | 4 ++--
> arch/x86/kernel/alternative.c | 29 ++++++++++++++++++++++-------
> arch/x86/net/bpf_jit_comp.c | 13 ++-----------
> 4 files changed, 34 insertions(+), 24 deletions(-)
>
I tested it with my clang build/vm and it worked. But gcc build without
CFI didn't compile, because CFI_OFFSET_BYTES is undefined in
apply_seal_endbr() when CONFIG_CFI=n.
I hid the poison_cfi() behind CONFIG_FINEIBT ifdef, which I am not sure
is the best way to go about this.
I have also renamed CFI_OFFSET to CFI_OFFSET_BYTES because CFI_OFFSET is
already defined in dwarf2.h. But I probably should not have done it
because it is assembler's macro, and they are never in the same
compilation unit?
I am copy pasting the patch below and attaching my gcc config without CFI
to this email, just in case.
Please let me know if this looks right to you.
Thanks again for looking into this!
---
arch/x86/include/asm/cfi.h | 12 +++++++----
arch/x86/include/asm/linkage.h | 4 ++--
arch/x86/kernel/alternative.c | 38 +++++++++++++++++++++++-----------
arch/x86/net/bpf_jit_comp.c | 13 ++----------
4 files changed, 38 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index c40b9ebc1fb4..c23e9bb9ec50 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -115,15 +115,19 @@ struct pt_regs;
enum bug_trap_type handle_cfi_failure(struct pt_regs *regs);
#define __bpfcall
+#ifdef CONFIG_CALL_PADDING
+#define CFI_OFFSET_BYTES (CONFIG_FUNCTION_PADDING_CFI+5)
+#else
+#define CFI_OFFSET_BYTES 5
+#endif
+
static inline int cfi_get_offset(void)
{
switch (cfi_mode) {
case CFI_FINEIBT:
- return 16;
+ return /* fineibt_prefix_size */ 16;
case CFI_KCFI:
- if (IS_ENABLED(CONFIG_CALL_PADDING))
- return 16;
- return 5;
+ return CFI_OFFSET_BYTES;
default:
return 0;
}
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 9d38ae744a2e..a7294656ad90 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -68,7 +68,7 @@
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
* CFI symbol layout changes.
*
- * Without CALL_THUNKS:
+ * Without CALL_PADDING:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
@@ -77,7 +77,7 @@
* .long __kcfi_typeid_##name
* name:
*
- * With CALL_THUNKS:
+ * With CALL_PADDING:
*
* .align FUNCTION_ALIGNMENT
* __cfi_##name:
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 28518371d8bf..18e8efc04819 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1113,7 +1113,9 @@ static __noendbr bool exact_endbr(u32 *val)
#endif
+#ifdef CONFIG_FINEIBT
static void poison_cfi(void *addr);
+#endif
static void __init_or_module poison_endbr(void *addr)
{
@@ -1146,8 +1148,9 @@ void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
void *addr = (void *)s + *s;
poison_endbr(addr);
- if (IS_ENABLED(CONFIG_FINEIBT))
- poison_cfi(addr - 16);
+#ifdef CONFIG_FINEIBT
+ poison_cfi(addr - CFI_OFFSET_BYTES);
+#endif
}
}
@@ -1354,6 +1357,8 @@ extern u8 fineibt_preamble_end[];
#define fineibt_preamble_ud 0x13
#define fineibt_preamble_hash 5
+#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
+
/*
* <fineibt_caller_start>:
* 0: b8 78 56 34 12 mov $0x12345678, %eax
@@ -1599,7 +1604,7 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
* have determined there are no indirect calls to it and we
* don't need no CFI either.
*/
- if (!is_endbr(addr + 16))
+ if (!is_endbr(addr + CFI_OFFSET_BYTES))
continue;
hash = decode_preamble_hash(addr, &arity);
@@ -1607,6 +1612,15 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
addr, addr, 5, addr))
return -EINVAL;
+ /*
+ * FineIBT relies on being at func-16, so if the preamble is
+ * actually larger than that, place it the tail end.
+ *
+ * NOTE: this is possible with things like DEBUG_CALL_THUNKS
+ * and DEBUG_FORCE_FUNCTION_ALIGN_64B.
+ */
+ addr += CFI_OFFSET_BYTES - fineibt_prefix_size;
+
text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
@@ -1629,10 +1643,10 @@ static void cfi_rewrite_endbr(s32 *start, s32 *end)
for (s = start; s < end; s++) {
void *addr = (void *)s + *s;
- if (!exact_endbr(addr + 16))
+ if (!exact_endbr(addr + CFI_OFFSET_BYTES))
continue;
- poison_endbr(addr + 16);
+ poison_endbr(addr + CFI_OFFSET_BYTES);
}
}
@@ -1737,7 +1751,8 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
if (FINEIBT_WARN(fineibt_preamble_size, 20) ||
FINEIBT_WARN(fineibt_preamble_bhi + fineibt_bhi1_size, 20) ||
FINEIBT_WARN(fineibt_caller_size, 14) ||
- FINEIBT_WARN(fineibt_paranoid_size, 20))
+ FINEIBT_WARN(fineibt_paranoid_size, 20) ||
+ WARN_ON_ONCE(CFI_OFFSET_BYTES < fineibt_prefix_size))
return;
if (cfi_mode == CFI_AUTO) {
@@ -1850,6 +1865,11 @@ static void poison_cfi(void *addr)
*/
switch (cfi_mode) {
case CFI_FINEIBT:
+ /*
+ * FineIBT preamble is at func-16.
+ */
+ addr += CFI_OFFSET_BYTES - fineibt_prefix_size;
+
/*
* FineIBT prefix should start with an ENDBR.
*/
@@ -1888,8 +1908,6 @@ static void poison_cfi(void *addr)
}
}
-#define fineibt_prefix_size (fineibt_preamble_size - ENDBR_INSN_SIZE)
-
/*
* When regs->ip points to a 0xD6 byte in the FineIBT preamble,
* return true and fill out target and type.
@@ -2045,10 +2063,6 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
pr_info("CFI: Using standard kCFI\n");
}
-#ifdef CONFIG_X86_KERNEL_IBT
-static void poison_cfi(void *addr) { }
-#endif
-
#endif /* !CONFIG_FINEIBT */
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b0bac2a66eff..ea76949ddda5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -438,17 +438,8 @@ static void emit_kcfi(u8 **pprog, u32 hash)
EMIT1_off32(0xb8, hash); /* movl $hash, %eax */
#ifdef CONFIG_CALL_PADDING
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
+ for (int i = 0; i < CONFIG_FUNCTION_PADDING_CFI; i++)
+ EMIT1(0x90);
#endif
EMIT_ENDBR();
--
2.52.0
View attachment "config" of type "text/plain" (80079 bytes)
Powered by blists - more mailing lists