lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z1ve5Mvzv4+Qyn+H@perf>
Date: Fri, 13 Dec 2024 16:14:56 +0900
From: Youngmin Nam <youngmin.nam@...sung.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: Youngmin Nam <youngmin.nam@...sung.com>, Neal Cardwell
	<ncardwell@...gle.com>, davem@...emloft.net, dsahern@...nel.org,
	kuba@...nel.org, pabeni@...hat.com, horms@...nel.org,
	dujeong.lee@...sung.com, guo88.liu@...sung.com, yiwang.cai@...sung.com,
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
	joonki.min@...sung.com, hajun.sung@...sung.com, d7271.choe@...sung.com,
	sw.ju@...sung.com
Subject: Re: [PATCH] tcp: check socket state before calling WARN_ON

On Wed, Dec 04, 2024 at 12:08:59PM +0900, Youngmin Nam wrote:
> Hi Eric.
> Thanks for looking at this issue.
> 
> On Tue, Dec 03, 2024 at 12:07:05PM +0100, Eric Dumazet wrote:
> > On Tue, Dec 3, 2024 at 9:10 AM Youngmin Nam <youngmin.nam@...sung.com> wrote:
> > >
> > > We encountered the following WARNINGs
> > > in tcp_sacktag_write_queue()/tcp_fastretrans_alert()
> > > which triggered a kernel panic due to panic_on_warn.
> > >
> > > case 1.
> > > ------------[ cut here ]------------
> > > WARNING: CPU: 4 PID: 453 at net/ipv4/tcp_input.c:2026
> > > Call trace:
> > >  tcp_sacktag_write_queue+0xae8/0xb60
> > >  tcp_ack+0x4ec/0x12b8
> > >  tcp_rcv_state_process+0x22c/0xd38
> > >  tcp_v4_do_rcv+0x220/0x300
> > >  tcp_v4_rcv+0xa5c/0xbb4
> > >  ip_protocol_deliver_rcu+0x198/0x34c
> > >  ip_local_deliver_finish+0x94/0xc4
> > >  ip_local_deliver+0x74/0x10c
> > >  ip_rcv+0xa0/0x13c
> > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > >
> > > case 2.
> > > ------------[ cut here ]------------
> > > WARNING: CPU: 0 PID: 648 at net/ipv4/tcp_input.c:3004
> > > Call trace:
> > >  tcp_fastretrans_alert+0x8ac/0xa74
> > >  tcp_ack+0x904/0x12b8
> > >  tcp_rcv_state_process+0x22c/0xd38
> > >  tcp_v4_do_rcv+0x220/0x300
> > >  tcp_v4_rcv+0xa5c/0xbb4
> > >  ip_protocol_deliver_rcu+0x198/0x34c
> > >  ip_local_deliver_finish+0x94/0xc4
> > >  ip_local_deliver+0x74/0x10c
> > >  ip_rcv+0xa0/0x13c
> > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > >
> > 
> > I have not seen these warnings firing. Neal, have you seen this in the past ?
> > 
> > Please provide the kernel version (this must be a pristine LTS one).
> We are running Android kernel for Android mobile device which is based on LTS kernel 6.6-30.
> But we've seen this issue since kernel 5.15 LTS.
> 
> > and symbolized stack traces using scripts/decode_stacktrace.sh
> Unfortunately, we don't have the matched vmlinux right now. So we need to rebuild and reproduce.

Hi Eric.

We successfully reproduced this issue.
Here is the symbolized stack trace.

* Case 1
WARNING: CPU: 2 PID: 509 at net/ipv4/tcp_input.c:2026 tcp_sacktag_write_queue+0xae8/0xb60

panic+0x180                        mov w0, wzr (kernel/panic.c:369)
__warn+0x1d4                       adrp x0, #0xffffffd08256b000 <f_midi_longname+48857> (kernel/panic.c:240)
report_bug+0x174                   mov w19, #1 (lib/bug.c:201)
bug_handler+0x24                   cmp w0, #1 (arch/arm64/kernel/traps.c:1032)
brk_handler+0x94                   cbz w0, #0xffffffd081015eac <brk_handler+220> (arch/arm64/kernel/debug-monitors.c:330)
do_debug_exception+0xa4            cbz w0, #0xffffffd08103afe8 <do_debug_exception+200> (arch/arm64/mm/fault.c:965)
el1_dbg+0x58                       bl #0xffffffd08203994c <arm64_exit_el1_dbg> (arch/arm64/kernel/entry-common.c:443)
el1h_64_sync_handler+0x3c          b #0xffffffd082038884 <el1h_64_sync_handler+120> (arch/arm64/kernel/entry-common.c:482)
el1h_64_sync+0x68                  b #0xffffffd081012150 <ret_to_kernel> (arch/arm64/kernel/entry.S:594)
tcp_sacktag_write_queue+0xae8      brk #0x800 (net/ipv4/tcp_input.c:2029)
tcp_ack+0x494                      orr w21, w0, w21 (net/ipv4/tcp_input.c:3914)
tcp_rcv_state_process+0x224        ldrb w8, [x19, #0x12] (net/ipv4/tcp_input.c:6635)
tcp_v4_do_rcv+0x1ec                cbz w0, #0xffffffd081eb0628 <tcp_v4_do_rcv+520> (net/ipv4/tcp_ipv4.c:1757)
tcp_v4_rcv+0x984                   mov x0, x20 (include/linux/spinlock.h:391)
ip_protocol_deliver_rcu+0x194      tbz w0, #0x1f, #0xffffffd081e7cd00 <ip_protocol_deliver_rcu+496> (net/ipv4/ip_input.c:207)
ip_local_deliver+0xe4              bl #0xffffffd081166910 <__rcu_read_unlock> (include/linux/rcupdate.h:818)
ip_rcv+0x90                        mov w21, w0 (include/net/dst.h:468)
__netif_receive_skb_core+0xdc4     mov x23, x27 (net/core/dev.c:2241)
__netif_receive_skb_list_core+0xb8  ldr x26, [sp, #8] (net/core/dev.c:5648)
netif_receive_skb_list_inter..+0x228  tbz w21, #0, #0xffffffd081d819dc <netif_receive_skb_list_internal+576> (net/core/dev.c:5716)
napi_complete_done+0xb4            str x22, [x19, #0x108] (include/linux/list.h:37)
slsi_rx_netif_napi_poll+0x22c      mov w0, w20 (../exynos/soc-series/s-android15/drivers/net/wireless/pcie_scsc/netif.c:1722)
__napi_poll+0x5c                   mov w19, w0 (net/core/dev.c:6575)
napi_threaded_poll+0x110           strb wzr, [x28, #0x39] (net/core/dev.c:6721)
kthread+0x114                      sxtw x0, w0 (kernel/kthread.c:390)
ret_from_fork+0x10                 mrs x28, sp_el0 (arch/arm64/kernel/entry.S:862)

* Case 2
WARNING: CPU: 7 PID: 2099 at net/ipv4/tcp_input.c:3030 tcp_fastretrans_alert+0x860/0x910

panic+0x180                        mov w0, wzr (kernel/panic.c:369)
__warn+0x1d4                       adrp x0, #0xffffffd08256b000 <f_midi_longname+48857> (kernel/panic.c:240)
report_bug+0x174                   mov w19, #1 (lib/bug.c:201)
bug_handler+0x24                   cmp w0, #1 (arch/arm64/kernel/traps.c:1032)
brk_handler+0x94                   cbz w0, #0xffffffd081015eac <brk_handler+220> (arch/arm64/kernel/debug-monitors.c:330)
do_debug_exception+0xa4            cbz w0, #0xffffffd08103afe8 <do_debug_exception+200> (arch/arm64/mm/fault.c:965)
el1_dbg+0x58                       bl #0xffffffd08203994c <arm64_exit_el1_dbg> (arch/arm64/kernel/entry-common.c:443)
el1h_64_sync_handler+0x3c          b #0xffffffd082038884 <el1h_64_sync_handler+120> (arch/arm64/kernel/entry-common.c:482)
el1h_64_sync+0x68                  b #0xffffffd081012150 <ret_to_kernel> (arch/arm64/kernel/entry.S:594)
tcp_fastretrans_alert+0x860        brk #0x800 (net/ipv4/tcp_input.c:2723)
tcp_ack+0x8a4                      ldur w21, [x29, #-0x20] (net/ipv4/tcp_input.c:3991)
tcp_rcv_state_process+0x224        ldrb w8, [x19, #0x12] (net/ipv4/tcp_input.c:6635)
tcp_v4_do_rcv+0x1ec                cbz w0, #0xffffffd081eb0628 <tcp_v4_do_rcv+520> (net/ipv4/tcp_ipv4.c:1757)
tcp_v4_rcv+0x984                   mov x0, x20 (include/linux/spinlock.h:391)
ip_protocol_deliver_rcu+0x194      tbz w0, #0x1f, #0xffffffd081e7cd00 <ip_protocol_deliver_rcu+496> (net/ipv4/ip_input.c:207)
ip_local_deliver+0xe4              bl #0xffffffd081166910 <__rcu_read_unlock> (include/linux/rcupdate.h:818)
ip_rcv+0x90                        mov w21, w0 (include/net/dst.h:468)
__netif_receive_skb_core+0xdc4     mov x23, x27 (net/core/dev.c:2241)
__netif_receive_skb+0x40           ldr x2, [sp, #8] (net/core/dev.c:5570)
netif_receive_skb+0x3c             mov w19, w0 (net/core/dev.c:5771)
slsi_rx_data_deliver_skb+0xbe0     cmp w0, #1 (../exynos/soc-series/s-android15/drivers/net/wireless/pcie_scsc/sap_ma.c:1104)
slsi_ba_process_complete+0x70      mov x0, x21 (include/linux/spinlock.h:356)
slsi_ba_aging_timeout_handler+0x324  mov x0, x21 (include/linux/spinlock.h:396)
call_timer_fn+0x4c                 nop (arch/arm64/include/asm/jump_label.h:22)
__run_timers+0x1c4                 mov x0, x19 (kernel/time/timer.c:1755)
run_timer_softirq+0x24             mov w9, #0x1280 (kernel/time/timer.c:2038)
handle_softirqs+0x124              nop (arch/arm64/include/asm/jump_label.h:22)
__do_softirq+0x14                  ldp x29, x30, [sp], #0x10 (kernel/softirq.c:634)
____do_softirq+0x10                ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/irq.c:82)
call_on_irq_stack+0x3c             mov sp, x29 (arch/arm64/kernel/entry.S:896)
do_softirq_own_stack+0x1c          ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/irq.c:87)
__irq_exit_rcu+0x54                adrp x9, #0xffffffd083064000 <this_cpu_vector> (kernel/softirq.c:662)
irq_exit_rcu+0x10                  ldp x29, x30, [sp], #0x10 (kernel/softirq.c:697)
el0_interrupt+0x54                 bl #0xffffffd0810197b4 <local_daif_mask> (arch/arm64/kernel/entry-common.c:136)
__el0_irq_handler_common+0x18      ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/entry-common.c:774)
el0t_64_irq_handler+0x10           ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/entry-common.c:779)
el0t_64_irq+0x1a8                  b #0xffffffd0810121b8 <ret_to_user> (arch/arm64/kernel/entry.S:600)


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ