[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ZB39Pz3nQJGBetPm@google.com>
Date: Fri, 24 Mar 2023 12:42:55 -0700
From: Stanislav Fomichev <sdf@...gle.com>
To: Farbod Shahinfar <farbod.shahinfar@...imi.it>
Cc: "bpf@...r.kernel.org" <bpf@...r.kernel.org>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
John Fastabend <john.fastabend@...il.com>
Subject: Re: Kernel panic on bpf_skb_pull_data
On 03/24, Farbod Shahinfar wrote:
> Hello everyone,
> I am performing some test with BPF SK_SKB and I have encountered a
> scenario resulting in kernel panic. I use a BPF_SK_SKB_STREAM_PARSER
> program to parse a request which might be spanning multiple TCP
> segments. If the end of request is detected in the parser program it
> returns skb->len, passing the request to the
> BPF_SK_SKB_STREAM_VERDICT program, and otherwise it returns 0, waiting
> for more data to be received. You can find the BPF program attached
> (bpf_test.c). Is there an assumption that the program violates?
> To reproduce the crashing scenario, I use the python script attached
> (client.py) which sends data in chunks toward the bpf program. Usually,
> the kernel crashes on the 3rd segment.
> �
> To provide more information, I have attached some crash logs. I have
> tested this on kernel version 6.1.0 (slightly modified) and version
> 6.2.8 (unmodified, obtained from kernel.org). It seems that the panic
> happens when invoking the bpf_skb_pull_data.
> Is this a known issue or is there any information that I can provide to
> help resolve it?
John, is it something already covered by your recent [0]?
0:
https://lore.kernel.org/bpf/20230321215212.525630-1-john.fastabend@gmail.com/
> Sincerely,
> Farbod Shahinfar
> PhD student at Politecnico di Milano
> https://fshahinfar1.github.io/
> kernel BUG at net/core/skbuff.c:2446!
> [ 1549.847914] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
> [ 1549.853139] CPU: 10 PID: 2008 Comm: python3 Tainted: G
> O 6.2.8 #4
> [ 1549.860615] Hardware name: Dell Inc. PowerEdge C6525/04DK47, BIOS
> 2.0.3 01/15/2021
> [ 1549.868181] RIP: 0010:__pskb_pull_tail+0x45c/0x4b0
> [ 1549.872973] Code: 24 08 be 02 00 00 00 e8 42 cb ff ff 48 83 c4 10 31
> c0 5b 41 5c 41 5d 41 5e 41 5f 5d e9 41 a0 26 00 48 8d 78 ff e9 42 fd ff
> ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
> [ 1549.891711] RSP: 0018:ffffad1fc05f8998 EFLAGS: 00010282
> [ 1549.896939] RAX: 00000000fffffff2 RBX: 0000000000000001 RCX:
> 00000000000002c0
> [ 1549.904070] RDX: ffff992f00ffe15b RSI: ffff992f00ffe000 RDI:
> ffff992f168adf00
> [ 1549.911196] RBP: ffffad1fc05f89d0 R08: 00000000ffffff1c R09:
> ffff992f168ad300
> [ 1549.918329] R10: 0000000000000001 R11: 0000000000000001 R12:
> 0000000000000001
> [ 1549.925460] R13: ffff992f168adf00 R14: ffff992f00fff048 R15:
> ffff992f168adf00
> [ 1549.932585] FS: 00007fd4a5e55740(0000) GS:ffff994dfee80000(0000)
> knlGS:0000000000000000
> [ 1549.940670] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1549.946409] CR2: 0000000000453531 CR3: 000000010a688000 CR4:
> 0000000000350ee0
> [ 1549.953542] Call Trace:
> [ 1549.955997] <IRQ>
> [ 1549.958017] skb_ensure_writable+0x8f/0xa0
> [ 1549.962114] sk_skb_pull_data+0x24/0x30
> [ 1549.965953] bpf_prog_5c4f327a1e142419_parser+0x1f/0xd8
> [ 1549.971179] sk_psock_strp_parse+0x50/0xc0
> [ 1549.975279] __strp_recv+0x1fb/0x630
> [ 1549.978859] strp_recv+0x27/0x30
> [ 1549.982092] tcp_read_sock+0x91/0x1c0
> [ 1549.985757] ? __pfx_strp_recv+0x10/0x10
> [ 1549.989683] strp_read_sock+0x68/0xb0
> [ 1549.993348] strp_data_ready+0x61/0xa0
> [ 1549.997101] sk_psock_strp_data_ready+0x86/0xa0
> [ 1550.001635] tcp_data_ready+0x33/0xe0
> [ 1550.005301] tcp_data_queue+0x469/0xd40
> [ 1550.009142] tcp_rcv_established+0x264/0x730
> [ 1550.013411] ? sk_filter_trim_cap+0x114/0x250
> [ 1550.017772] tcp_v4_do_rcv+0x15d/0x2b0
> [ 1550.021524] tcp_v4_rcv+0x11f1/0x1260
> [ 1550.025192] ip_protocol_deliver_rcu+0x3b/0x330
> [ 1550.029724] ip_local_deliver_finish+0x8a/0xb0
> [ 1550.034171] ip_local_deliver+0x73/0x120
> [ 1550.038095] ? __pfx_ip_local_deliver_finish+0x10/0x10
> [ 1550.043234] ip_rcv_finish+0xc2/0xd0
> [ 1550.046816] ip_rcv+0x57/0xf0
> [ 1550.049787] ? probe_sched_wakeup+0x39/0x40
> [ 1550.053975] __netif_receive_skb_one_core+0x8c/0xa0
> [ 1550.058854] __netif_receive_skb+0x15/0x60
> [ 1550.062953] process_backlog+0xa8/0x140
> [ 1550.066793] __napi_poll+0x31/0x1d0
> [ 1550.070286] net_rx_action+0x290/0x2e0
> [ 1550.074037] __do_softirq+0xf5/0x2d7
> [ 1550.077618] do_softirq+0x9a/0xc0
> [ 1550.080935] </IRQ>
> [ 1550.083032] <TASK>
> [ 1550.085129] __local_bh_enable_ip+0x7d/0x80
> [ 1550.089318] ip_finish_output2+0x19c/0x570
> [ 1550.093417] __ip_finish_output+0x1fe/0x2f0
> [ 1550.097601] ? add_wait_queue+0x6f/0x80
> [ 1550.101443] ip_finish_output+0x2e/0xd0
> [ 1550.105282] ip_output+0x7e/0x110
> [ 1550.108601] ? do_renameat2+0x244/0x5b0
> [ 1550.112441] ? __pfx_ip_finish_output+0x10/0x10
> [ 1550.116974] ip_local_out+0x62/0x70
> [ 1550.120466] __ip_queue_xmit+0x192/0x450
> [ 1550.124392] ip_queue_xmit+0x19/0x20
> [ 1550.127971] __tcp_transmit_skb+0xa56/0xb90
> [ 1550.132157] tcp_write_xmit+0x54d/0x12a0
> [ 1550.136082] ? _copy_from_iter+0x12a/0x5b0
> [ 1550.140184] __tcp_push_pending_frames+0x3b/0x110
> [ 1550.144890] tcp_push+0x10c/0x120
> [ 1550.148207] tcp_sendmsg_locked+0x491/0xc30
> [ 1550.152397] tcp_sendmsg+0x31/0x50
> [ 1550.155800] inet_sendmsg+0x47/0x80
> [ 1550.159294] sock_sendmsg+0x66/0x70
> [ 1550.162786] __sys_sendto+0x122/0x1b0
> [ 1550.166454] ? debug_smp_processor_id+0x1b/0x30
> [ 1550.170984] ? fpregs_assert_state_consistent+0x2b/0x60
> [ 1550.176209] ? exit_to_user_mode_prepare+0x49/0x1b0
> [ 1550.181090] __x64_sys_sendto+0x2d/0x40
> [ 1550.184928] do_syscall_64+0x3f/0x90
> [ 1550.188509] entry_SYSCALL_64_after_hwframe+0x72/0xdc
> [ 1550.193561] RIP: 0033:0x7fd4a6141530
> [ 1550.197140] Code: ff eb bc 0f 1f 80 00 00 00 00 f3 0f 1e fa 41 89 ca
> 64 8b 04 25 18 00 00 00 85 c0 75 1d 45 31 c9 45 31 c0 b8 2c 00 00 00 0f
> 05 <48> 3d 00 f0 ff ff 77 68 c3 0f 1f 80 00 00 00 00 55 48 83 ec 20 48
> [ 1550.215879] RSP: 002b:00007ffe74631ee8 EFLAGS: 00000246 ORIG_RAX:
> 000000000000002c
> [ 1550.223443] RAX: ffffffffffffffda RBX: 00007ffe74631f80 RCX:
> 00007fd4a6141530
> [ 1550.230570] RDX: 0000000000000001 RSI: 00007fd4a5671ad0 RDI:
> 0000000000000003
> [ 1550.237702] RBP: 000000000113cfe0 R08: 0000000000000000 R09:
> 0000000000000000
> [ 1550.244835] R10: 0000000000000000 R11: 0000000000000246 R12:
> 0000000000000001
> [ 1550.251967] R13: 0000000000000000 R14: 00007ffe74631f80 R15:
> 0000000000624240
> [ 1550.259102] </TASK>
> [ 1550.261293] Modules linked in: nfsv3 nfs_acl nfs lockd grace fscache
> netfs ipod(O) ipmi_ssif intel_rapl_msr intel_rapl_common amd64_edac
> edac_mce_amd kvm_amd binfmt_misc kvm crct10dif_pclmul ghash_clmulni_intel
> sha512_ssse3 aesni_intel crypto_simd cryptd mgag200 acpi_ipmi
> drm_shmem_helper rapl wmi_bmof drm_kms_helper i2c_algo_bit syscopyarea
> ipmi_si ipmi_devintf sysfillrect sysimgblt ccp k10temp ipmi_msghandler
> acpi_power_meter mac_hid sch_fq_codel drm sunrpc ip_tables x_tables
> autofs4 mlx5_ib ib_uverbs ib_core mlx5_core pci_hyperv_intf ahci mlxfw
> crc32_pclmul libahci psample i2c_piix4 tls wmi
> [ 1550.313799] ---[ end trace 0000000000000000 ]---
> [ 1550.318423] RIP: 0010:__pskb_pull_tail+0x45c/0x4b0
> [ 1550.323223] Code: 24 08 be 02 00 00 00 e8 42 cb ff ff 48 83 c4 10 31
> c0 5b 41 5c 41 5d 41 5e 41 5f 5d e9 41 a0 26 00 48 8d 78 ff e9 42 fd ff
> ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
> [ 1550.341972] RSP: 0018:ffffad1fc05f8998 EFLAGS: 00010282
> [ 1550.347205] RAX: 00000000fffffff2 RBX: 0000000000000001 RCX:
> 00000000000002c0
> [ 1550.354339] RDX: ffff992f00ffe15b RSI: ffff992f00ffe000 RDI:
> ffff992f168adf00
> [ 1550.361478] RBP: ffffad1fc05f89d0 R08: 00000000ffffff1c R09:
> ffff992f168ad300
> [ 1550.368611] R10: 0000000000000001 R11: 0000000000000001 R12:
> 0000000000000001
> [ 1550.375746] R13: ffff992f168adf00 R14: ffff992f00fff048 R15:
> ffff992f168adf00
> [ 1550.382886] FS: 00007fd4a5e55740(0000) GS:ffff994dfee80000(0000)
> knlGS:0000000000000000
> [ 1550.390972] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 1550.396719] CR2: 0000000000453531 CR3: 000000010a688000 CR4:
> 0000000000350ee0
> [ 1550.403861] Kernel panic - not syncing: Fatal exception in interrupt
> [ 1550.410763] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation
> range: 0xffffffff80000000-0xffffffffbfffffff)
> [ 1550.420940] ---[ end Kernel panic - not syncing: Fatal exception in
> interrupt ]---
> [ 404.840245] kernel BUG at net/core/skbuff.c:2380!
> [ 404.845070] invalid opcode: 0000 [#1] PREEMPT SMP PTI
> [ 404.846838] CPU: 2 PID: 1166 Comm: nc Not tainted
> 6.1.0-rc7-g83de108e3723-dirty #73
> [ 404.848923] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
> 1.15.0-1 04/01/2014
> [ 404.850796] RIP: 0010:__pskb_pull_tail+0x3fc/0x450
> [ 404.852171] Code: 24 08 be 01 00 00 00 e8 e2 d0 ff ff 48 83 c4 10 31
> c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 8d 78 ff e9 96 fd ff
> ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
> [ 404.856380] RSP: 0018:ffffa7aa800fca50 EFLAGS: 00010282
> [ 404.857563] RAX: 00000000fffffff2 RBX: 0000000000000008 RCX:
> 0000000000000008
> [ 404.859208] RDX: ffffa09f464b815d RSI: 00000000000002c0 RDI:
> 0000000000000000
> [ 404.860790] RBP: ffffa7aa800fca88 R08: 00000000ffffff25 R09:
> 7fffffffffffffff
> [ 404.862379] R10: 0000000000000008 R11: ffffa09f44d9fc40 R12:
> 0000000000000008
> [ 404.863963] R13: ffffa09f44d9fc00 R14: ffffa09f44d9fc00 R15:
> ffffa09f44d9f700
> [ 404.865534] FS: 00007f98873c3740(0000) GS:ffffa0a0b7d00000(0000)
> knlGS:0000000000000000
> [ 404.867367] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 404.868771] CR2: 00007ffe1f2db090 CR3: 00000001033c8002 CR4:
> 0000000000370ee0
> [ 404.870448] Call Trace:
> [ 404.871063] <IRQ>
> [ 404.871663] skb_ensure_writable+0x84/0xa0
> [ 404.872678] sk_skb_pull_data+0x17/0x20
> [ 404.873650] bpf_prog_fddaddf43f6741a5_parser+0x24/0x1e9
> [ 404.874942] ? selinux_netlbl_sock_rcv_skb+0x2e/0x1b0
> [ 404.876160] ? kmem_cache_alloc+0x33/0x1b0
> [ 404.877175] sk_psock_strp_parse+0x3f/0xc0
> [ 404.878222] __strp_recv+0x1f2/0x620
> [ 404.879143] ? strp_process+0x40/0x40
> [ 404.880081] tcp_read_sock+0x7f/0x1b0
> [ 404.881019] strp_read_sock+0x5e/0xa0
> [ 404.882719] strp_data_ready+0x54/0x80
> [ 404.883675] sk_psock_strp_data_ready+0x68/0x80
> [ 404.884890] tcp_data_queue+0x43a/0xce0
> [ 404.885950] tcp_rcv_established+0x248/0x6e0
> [ 404.887026] tcp_v4_do_rcv+0x147/0x290
> [ 404.887969] tcp_v4_rcv+0xe59/0xf00
> [ 404.888865] ip_protocol_deliver_rcu+0x2d/0x1f0
> [ 404.889985] ip_local_deliver_finish+0x6e/0x90
> [ 404.891125] ip_local_deliver+0x66/0x110
> [ 404.892116] ip_rcv+0x4a/0xf0
> [ 404.893091] __netif_receive_skb_one_core+0x86/0xa0
> [ 404.894300] process_backlog+0xa3/0x150
> [ 404.895287] __napi_poll+0x24/0x160
> [ 404.896249] net_rx_action+0x291/0x350
> [ 404.897198] __do_softirq+0xb3/0x28c
> [ 404.898183] do_softirq+0x52/0x70
> [ 404.899064] </IRQ>
> [ 404.899678] <TASK>
> [ 404.900310] __local_bh_enable_ip+0x5f/0x70
> [ 404.901457] ip_finish_output2+0x179/0x500
> [ 404.902572] ip_output+0x71/0x110
> [ 404.903481] ? __ip_finish_output+0x2a0/0x2a0
> [ 404.904643] __ip_queue_xmit+0x174/0x3d0
> [ 404.905693] __tcp_transmit_skb+0xa38/0xb50
> [ 404.906764] ? __alloc_skb+0x89/0x1b0
> [ 404.907704] tcp_write_xmit+0x4dc/0x1160
> [ 404.908689] __tcp_push_pending_frames+0x2d/0xc0
> [ 404.909830] tcp_sendmsg_locked+0x291/0xbf0
> [ 404.910872] tcp_sendmsg+0x23/0x40
> [ 404.911739] sock_sendmsg+0x56/0x60
> [ 404.913378] sock_write_iter+0x92/0xf0
> [ 404.914352] vfs_write+0x356/0x3c0
> [ 404.915263] ksys_write+0xa6/0xe0
> [ 404.916298] do_syscall_64+0x38/0x90
> [ 404.917245] entry_SYSCALL_64_after_hwframe+0x63/0xcd
> [ 404.918560] RIP: 0033:0x7f98874d4077
> [ 404.919524] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00
> 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f
> 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
> [ 404.923807] RSP: 002b:00007ffe1f2db018 EFLAGS: 00000246 ORIG_RAX:
> 0000000000000001
> [ 404.925817] RAX: ffffffffffffffda RBX: 0000000000000000 RCX:
> 00007f98874d4077
> [ 404.927706] RDX: 0000000000000008 RSI: 00007ffe1f2df080 RDI:
> 0000000000000003
> [ 404.929369] RBP: 00007ffe1f2db058 R08: 0000000000000004 R09:
> 0000000000000001
> [ 404.931039] R10: 0000000000000000 R11: 0000000000000246 R12:
> 0000000000000008
> [ 404.932717] R13: 00007ffe1f2df080 R14: 0000000000000003 R15:
> 0000000000000005
> [ 404.934448] </TASK>
> [ 404.935141] Modules linked in:
> [ 404.936005] ---[ end trace 0000000000000000 ]---
> [ 404.937206] RIP: 0010:__pskb_pull_tail+0x3fc/0x450
> [ 404.938475] Code: 24 08 be 01 00 00 00 e8 e2 d0 ff ff 48 83 c4 10 31
> c0 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc 48 8d 78 ff e9 96 fd ff
> ff <0f> 0b 48 c7 44 24 08 00 00 00 00 e9 9f fe ff ff 48 c7 44 24 08 00
> [ 404.943339] RSP: 0018:ffffa7aa800fca50 EFLAGS: 00010282
> [ 404.944681] RAX: 00000000fffffff2 RBX: 0000000000000008 RCX:
> 0000000000000008
> [ 404.946518] RDX: ffffa09f464b815d RSI: 00000000000002c0 RDI:
> 0000000000000000
> [ 404.948302] RBP: ffffa7aa800fca88 R08: 00000000ffffff25 R09:
> 7fffffffffffffff
> [ 404.950269] R10: 0000000000000008 R11: ffffa09f44d9fc40 R12:
> 0000000000000008
> [ 404.951691] R13: ffffa09f44d9fc00 R14: ffffa09f44d9fc00 R15:
> ffffa09f44d9f700
> [ 404.953292] FS: 00007f98873c3740(0000) GS:ffffa0a0b7d00000(0000)
> knlGS:0000000000000000
> [ 404.955084] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 404.956425] CR2: 00007ffe1f2db090 CR3: 00000001033c8002 CR4:
> 0000000000370ee0
> [ 404.958010] Kernel panic - not syncing: Fatal exception in interrupt
> [ 404.959636] Kernel Offset: 0x29200000 from 0xffffffff81000000
> (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
> [ 404.961972] ---[ end Kernel panic - not syncing: Fatal exception in
> interrupt ]---
> import socket
> import time
> import argparse
> def _send(sock, data):
> print(f'[Sending "{data}"]')
> sock.send(data.encode())
> def _recv(sock):
> try:
> resp = sock.recv(2048)
> return resp.decode()
> except:
> print('[No data received]')
> return None
> def parse_args():
> parser = argparse.ArgumentParser()
> parser.add_argument('-k', help='Kernel crash scenario',
> action='store_true')
> parser.add_argument('--ip', default='localhost')
> parser.add_argument('--port', default=8080, type=int)
> args = parser.parse_args()
> return args
> def main():
> args = parse_args()
> s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
> s.connect((args.ip, args.port))
> s.settimeout(1)
> # What request to send?
> req = ['hello world 1', 'hello world 2 END']
> if args.k:
> req = ['hello world 1', 'hello world 2', 'hello world 3',
> 'hello world 4', 'hello world 5 END']
> for r in req:
> _send(s, r)
> resp = _recv(s)
> if resp:
> print(resp)
> s.close()
> if __name__ == '__main__':
> main()
> #include <sys/types.h>
> #include <sys/socket.h>
> #include <linux/tcp.h>
> #include <linux/bpf.h>
> #include <bpf/bpf_helpers.h>
> #include <bpf/bpf_endian.h>
> /* Put state of each socket in this struct (This will be used in
> sockops.h as
> * part of per socket metadata) */
> struct connection_state { };
> #include "my_bpf/sockops.h"
> #define OFFSET_MASK 0x0fff
> /* struct arg { */
> /* int test; */
> /* }; */
> /* struct { */
> /* __uint(type, BPF_MAP_TYPE_ARRAY); */
> /* /1* __uint(map_flags, BPF_F_MMAPABLE); *1/ */
> /* __type(key, __u32); */
> /* __type(value, struct arg); */
> /* __uint(max_entries, 1); */
> /* } arg_map SEC(".maps"); */
> SEC("sk_skb/stream_parser")
> int parser(struct __sk_buff *skb)
> {
> void *data;
> void *data_end;
> __u16 len;
> /* Pull message data so that we can access it */
> if (bpf_skb_pull_data(skb, skb->len) != 0) {
> bpf_printk("Parser: Failed to load message data\n");
> return 0;
> }
> data = (void *)(long)skb->data;
> data_end = (void *)(long)skb->data_end;
> len = skb->len;
> char *ptr = data + ((len - 3) & 0x7fff);
> if ((void *)ptr < data || ((void *)ptr + 3 > data_end)) {
> bpf_printk("Parser: Not enough data!");
> return 0;
> }
> if (ptr[0] == 'E' && ptr[1] == 'N' && ptr[2] == 'D') {
> /* Found the end of request */
> return skb->len;
> }
> bpf_printk("@%d\n%s", (long)ptr - (long)data, ptr);
> return 0;
> }
> SEC("sk_skb/stream_verdict")
> int verdict(struct __sk_buff *skb)
> {
> return SK_PASS;
> }
> char _license[] SEC("license") = "GPL";
Powered by blists - more mailing lists