[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251223034332.GA2008178@nuc10>
Date: Mon, 22 Dec 2025 19:43:32 -0800
From: Rustam Kovhaev <rkovhaev@...il.com>
To: Peter Zijlstra <peterz@...radead.org>,
Sami Tolvanen <samitolvanen@...gle.com>,
Alexei Starovoitov <ast@...nel.org>
Cc: linux-kernel@...r.kernel.org, bpf@...r.kernel.org, rkovhaev@...il.com
Subject: kernel crashes in BPF JIT code with kCFI and clang on x86
Hi, Alexey, Sami, Peter,
I have caught this non-present page fault when I switched my build to clang to support rust.
[ 8.667260][ T165] BUG: unable to handle page fault for address: ffffffffc03d1fdd
[ 8.668606][ T165] #PF: supervisor read access in kernel mode
[ 8.669632][ T165] #PF: error_code(0x0000) - not-present page
[ 8.670633][ T165] PGD 3a71067 P4D 3a71067 PUD 3a73067 PMD 10af9d067 PTE 0
[ 8.671824][ T165] Oops: Oops: 0000 [#1] SMP KASAN NOPTI
[ 8.672759][ T165] CPU: 0 UID: 0 PID: 165 Comm: (udev-worker) Not tainted 6.19.0-rc2+ #51 PREEMPT
[ 8.674379][ T165] RIP: 0010:sk_filter_trim_cap+0x333/0x700
[ 8.675367][ T165] Code: 83 3d f0 ce cc 02 00 0f 8f b4 02 00 00 4c 8d 73 60 48 8d 7b 48 e8 dd e7 25 ff 4c 8b 5b 48 4c 89 ff 4c 89 f6 41 ba ba 26 af 37 <45> 03 53 c1 74 02 0f 0b 2e 2e 2e 41 ff d3 41 89 c6 48 8b 5c 24 40
[ 8.679456][ T165] RSP: 0018:ffffc9000217f3c0 EFLAGS: 00010246
[ 8.680578][ T165] RAX: 0000000000000000 RBX: ffffc90000081000 RCX: 0000000000000000
[ 8.681953][ T165] RDX: 0000000000000000 RSI: ffffc90000081060 RDI: ffff888117db8780
[ 8.683309][ T165] RBP: ffffc9000217f4d0 R08: 0000000000000000 R09: 0000000000000000
[ 8.684687][ T165] R10: 0000000037af26ba R11: ffffffffc03d201c R12: ffff88811958c578
[ 8.686644][ T165] R13: 1ffff9200042fe84 R14: ffffc90000081060 R15: ffff888117db8780
[ 8.688064][ T165] FS: 00007f23d765f980(0000) GS:ffff888469ea1000(0000) knlGS:0000000000000000
[ 8.689585][ T165] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 8.690733][ T165] CR2: ffffffffc03d1fdd CR3: 000000011554a001 CR4: 00000000003706b0
[ 8.692221][ T165] Call Trace:
[ 8.692807][ T165] <TASK>
[ 8.693331][ T165] ? __copy_skb_header+0xe6/0x140
[ 8.694240][ T165] ? sk_filter_trim_cap+0x10e/0x700
[ 8.695178][ T165] ? __cfi_sk_filter_trim_cap+0x40/0x40
[ 8.696488][ T165] ? skb_clone+0x1cf/0x240
[ 8.697669][ T165] netlink_broadcast_filtered+0x511/0xa40
[ 8.698696][ T165] ? __cfi_netlink_broadcast_filtered+0x40/0x40
[ 8.699801][ T165] netlink_sendmsg+0x4a2/0x600
[ 8.700656][ T165] ? __cfi_netlink_sendmsg+0x40/0x40
[ 8.701571][ T165] ____sys_sendmsg+0x4e0/0x580
[ 8.702400][ T165] ? __this_cpu_preempt_check+0x17/0x40
[ 8.703389][ T165] ? __sys_sendmsg_sock+0x80/0x80
[ 8.704278][ T165] ? __import_iovec+0x264/0x400
[ 8.705172][ T165] ___sys_sendmsg+0x2d7/0x300
[ 8.706014][ T165] ? __sys_sendmsg+0x240/0x240
[ 8.706853][ T165] ? __kasan_check_read+0x15/0x40
[ 8.707841][ T165] __x64_sys_sendmsg+0x144/0x200
[ 8.708760][ T165] ? __cfi___x64_sys_sendmsg+0x40/0x40
[ 8.709733][ T165] ? do_syscall_64+0x97/0x400
[ 8.710549][ T165] ? __this_cpu_preempt_check+0x17/0x40
[ 8.711518][ T165] ? entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 8.712599][ T165] x64_sys_call+0x173a/0x2d40
[ 8.713513][ T165] do_syscall_64+0xd9/0x400
[ 8.714738][ T165] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 8.716203][ T165] RIP: 0033:0x7f23d7899687
[ 8.717292][ T165] Code: 48 89 fa 4c 89 df e8 58 b3 00 00 8b 93 08 03 00 00 59 5e 48 83 f8 fc 74 1a 5b c3 0f 1f 84 00 00 00 00 00 48 8b 44 24 10 0f 05 <5b> c3 0f 1f 80 00 00 00 00 83 e2 39 83 fa 08 75 de e8 23 ff ff ff
[ 8.721847][ T165] RSP: 002b:00007ffc1ac39800 EFLAGS: 00000202 ORIG_RAX: 000000000000002e
[ 8.723732][ T165] RAX: ffffffffffffffda RBX: 00007f23d765f980 RCX: 00007f23d7899687
[ 8.725533][ T165] RDX: 0000000000000000 RSI: 00007ffc1ac39870 RDI: 000000000000000e
[ 8.727351][ T165] RBP: 000055e6665ab0f0 R08: 0000000000000000 R09: 0000000000000000
[ 8.729158][ T165] R10: 0000000000000000 R11: 0000000000000202 R12: 0000000000000000
[ 8.730955][ T165] R13: 000055e6665748e0 R14: 00007ffc1ac39858 R15: 0000000000000000
[ 8.732719][ T165] </TASK>
looking at prog->bpf_func memory (0xffffffffc03d2024) i can see 16 bytes of kCFI JIT preamble with hash before NOPs:
>>> x/40xb 0xffffffffc03d2000
0xffffffffc03d2000: 0x40 0x01 0x00 0x00 0xcc 0xcc 0xcc 0xcc
0xffffffffc03d2008: 0xcc 0xcc 0xcc 0xcc 0xcc 0xcc 0xcc 0xcc
0xffffffffc03d2010: 0xcc 0xcc 0xcc 0xcc 0xb8 0x38 0x76 0xc2
0xffffffffc03d2018: 0x98 0x90 0x90 0x90 0x90 0x90 0x90 0x90
0xffffffffc03d2020: 0x90 0x90 0x90 0x90 0xf3 0x0f 0x1e 0xfa
but clang generates add -0x3f(%r11),%r10d code and expects the hash to be at offset prog->bpf_func - 63.
r11 holds the pointer to prog->bpf_func.
>>> i r rip r11 r10
rip 0xffffffff828134b3 <sk_filter_trim_cap+819>
r11 0xffffffffc03d2024
r10 0x673d89c8
>>> disassemble $rip-16,$rip+12
Dump of assembler code from 0xffffffff828134a3 to 0xffffffff828134bf:
0xffffffff828134a3 <sk_filter_trim_cap+803>: mov 0x48(%rbx),%r11
0xffffffff828134a7 <sk_filter_trim_cap+807>: mov %r15,%rdi
0xffffffff828134aa <sk_filter_trim_cap+810>: mov %r14,%rsi
0xffffffff828134ad <sk_filter_trim_cap+813>: mov $0x673d89c8,%r10d
=> 0xffffffff828134b3 <sk_filter_trim_cap+819>: add -0x3f(%r11),%r10d
0xffffffff828134b7 <sk_filter_trim_cap+823>: je 0xffffffff828134bb <sk_filter_trim_cap+827>
0xffffffff828134b9 <sk_filter_trim_cap+825>: ud2
0xffffffff828134bb <sk_filter_trim_cap+827>: cs cs cs call *%r11
End of assembler dump.
>>>
Here is the patch that fixed it for me:
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index c40b9ebc1fb4..48f232d4b9d6 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -121,6 +121,8 @@ static inline int cfi_get_offset(void)
case CFI_FINEIBT:
return 16;
case CFI_KCFI:
+ if (IS_ENABLED(CONFIG_CC_IS_CLANG) && IS_ENABLED(CONFIG_CALL_PADDING))
+ return CONFIG_FUNCTION_PADDING_CFI + 5;
if (IS_ENABLED(CONFIG_CALL_PADDING))
return 16;
return 5;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b0bac2a66eff..f8706d5b155f 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -435,20 +435,16 @@ static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity)
static void emit_kcfi(u8 **pprog, u32 hash)
{
u8 *prog = *pprog;
+ size_t nop_len = 11;
+ if (IS_ENABLED(CONFIG_CC_IS_CLANG) && IS_ENABLED(CONFIG_CALL_PADDING))
+ nop_len = 55;
EMIT1_off32(0xb8, hash); /* movl $hash, %eax */
#ifdef CONFIG_CALL_PADDING
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
+ while( nop_len > 0) {
+ EMIT1(0x90);
+ nop_len--;
+ }
#endif
EMIT_ENDBR();
After switching to clang kbuild always generates these huge paddings in my kernel config:
rusty@...10:~/code/kbuild_rust$ grep -e IBT -e PADDING .config
CONFIG_CC_HAS_IBT=y
CONFIG_X86_KERNEL_IBT=y
CONFIG_RANDOMIZE_MEMORY_PHYSICAL_PADDING=0xa
CONFIG_CC_HAS_ENTRY_PADDING=y
CONFIG_FUNCTION_PADDING_CFI=59
CONFIG_FUNCTION_PADDING_BYTES=59
CONFIG_CALL_PADDING=y
CONFIG_FINEIBT=y
I have also attached my kernel config to this email, I don't use cfi= command line parameter, which defaults it to kcfi on my machine.
Please let me know if my patch is the right way to address the issue, if so, I will resend it.
Or maybe the issue should be fixed in cfi code that does code patching early in the boot process.
Thank you for looking into this!
View attachment "config_rust" of type "text/plain" (80941 bytes)
Powered by blists - more mailing lists