[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z1ve5Mvzv4+Qyn+H@perf>
Date: Fri, 13 Dec 2024 16:14:56 +0900
From: Youngmin Nam <youngmin.nam@...sung.com>
To: Eric Dumazet <edumazet@...gle.com>
Cc: Youngmin Nam <youngmin.nam@...sung.com>, Neal Cardwell
<ncardwell@...gle.com>, davem@...emloft.net, dsahern@...nel.org,
kuba@...nel.org, pabeni@...hat.com, horms@...nel.org,
dujeong.lee@...sung.com, guo88.liu@...sung.com, yiwang.cai@...sung.com,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
joonki.min@...sung.com, hajun.sung@...sung.com, d7271.choe@...sung.com,
sw.ju@...sung.com
Subject: Re: [PATCH] tcp: check socket state before calling WARN_ON
On Wed, Dec 04, 2024 at 12:08:59PM +0900, Youngmin Nam wrote:
> Hi Eric.
> Thanks for looking at this issue.
>
> On Tue, Dec 03, 2024 at 12:07:05PM +0100, Eric Dumazet wrote:
> > On Tue, Dec 3, 2024 at 9:10 AM Youngmin Nam <youngmin.nam@...sung.com> wrote:
> > >
> > > We encountered the following WARNINGs
> > > in tcp_sacktag_write_queue()/tcp_fastretrans_alert()
> > > which triggered a kernel panic due to panic_on_warn.
> > >
> > > case 1.
> > > ------------[ cut here ]------------
> > > WARNING: CPU: 4 PID: 453 at net/ipv4/tcp_input.c:2026
> > > Call trace:
> > > tcp_sacktag_write_queue+0xae8/0xb60
> > > tcp_ack+0x4ec/0x12b8
> > > tcp_rcv_state_process+0x22c/0xd38
> > > tcp_v4_do_rcv+0x220/0x300
> > > tcp_v4_rcv+0xa5c/0xbb4
> > > ip_protocol_deliver_rcu+0x198/0x34c
> > > ip_local_deliver_finish+0x94/0xc4
> > > ip_local_deliver+0x74/0x10c
> > > ip_rcv+0xa0/0x13c
> > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > >
> > > case 2.
> > > ------------[ cut here ]------------
> > > WARNING: CPU: 0 PID: 648 at net/ipv4/tcp_input.c:3004
> > > Call trace:
> > > tcp_fastretrans_alert+0x8ac/0xa74
> > > tcp_ack+0x904/0x12b8
> > > tcp_rcv_state_process+0x22c/0xd38
> > > tcp_v4_do_rcv+0x220/0x300
> > > tcp_v4_rcv+0xa5c/0xbb4
> > > ip_protocol_deliver_rcu+0x198/0x34c
> > > ip_local_deliver_finish+0x94/0xc4
> > > ip_local_deliver+0x74/0x10c
> > > ip_rcv+0xa0/0x13c
> > > Kernel panic - not syncing: kernel: panic_on_warn set ...
> > >
> >
> > I have not seen these warnings firing. Neal, have you seen this in the past ?
> >
> > Please provide the kernel version (this must be a pristine LTS one).
> We are running Android kernel for Android mobile device which is based on LTS kernel 6.6-30.
> But we've seen this issue since kernel 5.15 LTS.
>
> > and symbolized stack traces using scripts/decode_stacktrace.sh
> Unfortunately, we don't have the matched vmlinux right now. So we need to rebuild and reproduce.
Hi Eric.
We successfully reproduced this issue.
Here is the symbolized stack trace.
* Case 1
WARNING: CPU: 2 PID: 509 at net/ipv4/tcp_input.c:2026 tcp_sacktag_write_queue+0xae8/0xb60
panic+0x180 mov w0, wzr (kernel/panic.c:369)
__warn+0x1d4 adrp x0, #0xffffffd08256b000 <f_midi_longname+48857> (kernel/panic.c:240)
report_bug+0x174 mov w19, #1 (lib/bug.c:201)
bug_handler+0x24 cmp w0, #1 (arch/arm64/kernel/traps.c:1032)
brk_handler+0x94 cbz w0, #0xffffffd081015eac <brk_handler+220> (arch/arm64/kernel/debug-monitors.c:330)
do_debug_exception+0xa4 cbz w0, #0xffffffd08103afe8 <do_debug_exception+200> (arch/arm64/mm/fault.c:965)
el1_dbg+0x58 bl #0xffffffd08203994c <arm64_exit_el1_dbg> (arch/arm64/kernel/entry-common.c:443)
el1h_64_sync_handler+0x3c b #0xffffffd082038884 <el1h_64_sync_handler+120> (arch/arm64/kernel/entry-common.c:482)
el1h_64_sync+0x68 b #0xffffffd081012150 <ret_to_kernel> (arch/arm64/kernel/entry.S:594)
tcp_sacktag_write_queue+0xae8 brk #0x800 (net/ipv4/tcp_input.c:2029)
tcp_ack+0x494 orr w21, w0, w21 (net/ipv4/tcp_input.c:3914)
tcp_rcv_state_process+0x224 ldrb w8, [x19, #0x12] (net/ipv4/tcp_input.c:6635)
tcp_v4_do_rcv+0x1ec cbz w0, #0xffffffd081eb0628 <tcp_v4_do_rcv+520> (net/ipv4/tcp_ipv4.c:1757)
tcp_v4_rcv+0x984 mov x0, x20 (include/linux/spinlock.h:391)
ip_protocol_deliver_rcu+0x194 tbz w0, #0x1f, #0xffffffd081e7cd00 <ip_protocol_deliver_rcu+496> (net/ipv4/ip_input.c:207)
ip_local_deliver+0xe4 bl #0xffffffd081166910 <__rcu_read_unlock> (include/linux/rcupdate.h:818)
ip_rcv+0x90 mov w21, w0 (include/net/dst.h:468)
__netif_receive_skb_core+0xdc4 mov x23, x27 (net/core/dev.c:2241)
__netif_receive_skb_list_core+0xb8 ldr x26, [sp, #8] (net/core/dev.c:5648)
netif_receive_skb_list_inter..+0x228 tbz w21, #0, #0xffffffd081d819dc <netif_receive_skb_list_internal+576> (net/core/dev.c:5716)
napi_complete_done+0xb4 str x22, [x19, #0x108] (include/linux/list.h:37)
slsi_rx_netif_napi_poll+0x22c mov w0, w20 (../exynos/soc-series/s-android15/drivers/net/wireless/pcie_scsc/netif.c:1722)
__napi_poll+0x5c mov w19, w0 (net/core/dev.c:6575)
napi_threaded_poll+0x110 strb wzr, [x28, #0x39] (net/core/dev.c:6721)
kthread+0x114 sxtw x0, w0 (kernel/kthread.c:390)
ret_from_fork+0x10 mrs x28, sp_el0 (arch/arm64/kernel/entry.S:862)
* Case 2
WARNING: CPU: 7 PID: 2099 at net/ipv4/tcp_input.c:3030 tcp_fastretrans_alert+0x860/0x910
panic+0x180 mov w0, wzr (kernel/panic.c:369)
__warn+0x1d4 adrp x0, #0xffffffd08256b000 <f_midi_longname+48857> (kernel/panic.c:240)
report_bug+0x174 mov w19, #1 (lib/bug.c:201)
bug_handler+0x24 cmp w0, #1 (arch/arm64/kernel/traps.c:1032)
brk_handler+0x94 cbz w0, #0xffffffd081015eac <brk_handler+220> (arch/arm64/kernel/debug-monitors.c:330)
do_debug_exception+0xa4 cbz w0, #0xffffffd08103afe8 <do_debug_exception+200> (arch/arm64/mm/fault.c:965)
el1_dbg+0x58 bl #0xffffffd08203994c <arm64_exit_el1_dbg> (arch/arm64/kernel/entry-common.c:443)
el1h_64_sync_handler+0x3c b #0xffffffd082038884 <el1h_64_sync_handler+120> (arch/arm64/kernel/entry-common.c:482)
el1h_64_sync+0x68 b #0xffffffd081012150 <ret_to_kernel> (arch/arm64/kernel/entry.S:594)
tcp_fastretrans_alert+0x860 brk #0x800 (net/ipv4/tcp_input.c:2723)
tcp_ack+0x8a4 ldur w21, [x29, #-0x20] (net/ipv4/tcp_input.c:3991)
tcp_rcv_state_process+0x224 ldrb w8, [x19, #0x12] (net/ipv4/tcp_input.c:6635)
tcp_v4_do_rcv+0x1ec cbz w0, #0xffffffd081eb0628 <tcp_v4_do_rcv+520> (net/ipv4/tcp_ipv4.c:1757)
tcp_v4_rcv+0x984 mov x0, x20 (include/linux/spinlock.h:391)
ip_protocol_deliver_rcu+0x194 tbz w0, #0x1f, #0xffffffd081e7cd00 <ip_protocol_deliver_rcu+496> (net/ipv4/ip_input.c:207)
ip_local_deliver+0xe4 bl #0xffffffd081166910 <__rcu_read_unlock> (include/linux/rcupdate.h:818)
ip_rcv+0x90 mov w21, w0 (include/net/dst.h:468)
__netif_receive_skb_core+0xdc4 mov x23, x27 (net/core/dev.c:2241)
__netif_receive_skb+0x40 ldr x2, [sp, #8] (net/core/dev.c:5570)
netif_receive_skb+0x3c mov w19, w0 (net/core/dev.c:5771)
slsi_rx_data_deliver_skb+0xbe0 cmp w0, #1 (../exynos/soc-series/s-android15/drivers/net/wireless/pcie_scsc/sap_ma.c:1104)
slsi_ba_process_complete+0x70 mov x0, x21 (include/linux/spinlock.h:356)
slsi_ba_aging_timeout_handler+0x324 mov x0, x21 (include/linux/spinlock.h:396)
call_timer_fn+0x4c nop (arch/arm64/include/asm/jump_label.h:22)
__run_timers+0x1c4 mov x0, x19 (kernel/time/timer.c:1755)
run_timer_softirq+0x24 mov w9, #0x1280 (kernel/time/timer.c:2038)
handle_softirqs+0x124 nop (arch/arm64/include/asm/jump_label.h:22)
__do_softirq+0x14 ldp x29, x30, [sp], #0x10 (kernel/softirq.c:634)
____do_softirq+0x10 ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/irq.c:82)
call_on_irq_stack+0x3c mov sp, x29 (arch/arm64/kernel/entry.S:896)
do_softirq_own_stack+0x1c ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/irq.c:87)
__irq_exit_rcu+0x54 adrp x9, #0xffffffd083064000 <this_cpu_vector> (kernel/softirq.c:662)
irq_exit_rcu+0x10 ldp x29, x30, [sp], #0x10 (kernel/softirq.c:697)
el0_interrupt+0x54 bl #0xffffffd0810197b4 <local_daif_mask> (arch/arm64/kernel/entry-common.c:136)
__el0_irq_handler_common+0x18 ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/entry-common.c:774)
el0t_64_irq_handler+0x10 ldp x29, x30, [sp], #0x10 (arch/arm64/kernel/entry-common.c:779)
el0t_64_irq+0x1a8 b #0xffffffd0810121b8 <ret_to_user> (arch/arm64/kernel/entry.S:600)
Powered by blists - more mailing lists