[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250520121908.1805732-1-edumazet@google.com>
Date: Tue, 20 May 2025 12:19:08 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>, netdev@...r.kernel.org, eric.dumazet@...il.com,
Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next] net: add debug checks in ____napi_schedule() and napi_poll()
While tracking an IDPF bug, I found that idpf_vport_splitq_napi_poll()
was not following NAPI rules.
It can indeed return @budget after napi_complete() has been called.
Add two debug conditions in networking core to hopefully catch
this kind of bugs sooner.
IDPF bug will be fixed in a separate patch.
[ 72.441242] repoll requested for device eth1 idpf_vport_splitq_napi_poll [idpf] but napi is not scheduled.
[ 72.446291] list_del corruption. next->prev should be ff31783d93b14040, but was ff31783d93b10080. (next=ff31783d93b10080)
[ 72.446659] kernel BUG at lib/list_debug.c:67!
[ 72.446816] Oops: invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC NOPTI
[ 72.447031] CPU: 156 UID: 0 PID: 16258 Comm: ip Tainted: G W 6.15.0-dbg-DEV #1944 NONE
[ 72.447340] Tainted: [W]=WARN
[ 72.447702] RIP: 0010:__list_del_entry_valid_or_report (lib/list_debug.c:65)
[ 72.450630] Call Trace:
[ 72.450720] <IRQ>
[ 72.450797] net_rx_action (include/linux/list.h:215 include/linux/list.h:287 net/core/dev.c:7385 net/core/dev.c:7516)
[ 72.450928] ? lock_release (kernel/locking/lockdep.c:?)
[ 72.451059] ? clockevents_program_event (kernel/time/clockevents.c:?)
[ 72.451222] handle_softirqs (kernel/softirq.c:579)
[ 72.451356] ? do_softirq (kernel/softirq.c:480)
[ 72.451480] ? idpf_vc_xn_exec (drivers/net/ethernet/intel/idpf/idpf_virtchnl.c:462) idpf
[ 72.451635] do_softirq (kernel/softirq.c:480)
[ 72.451750] </IRQ>
[ 72.451828] <TASK>
[ 72.451905] __local_bh_enable_ip (kernel/softirq.c:?)
[ 72.452051] idpf_vc_xn_exec (drivers/net/ethernet/intel/idpf/idpf_virtchnl.c:462) idpf
[ 72.452210] idpf_send_delete_queues_msg (drivers/net/ethernet/intel/idpf/idpf_virtchnl.c:2083) idpf
[ 72.452390] idpf_vport_stop (drivers/net/ethernet/intel/idpf/idpf_lib.c:837 drivers/net/ethernet/intel/idpf/idpf_lib.c:868) idpf
[ 72.452541] ? idpf_vport_stop (include/linux/bottom_half.h:? include/linux/netdevice.h:4762 drivers/net/ethernet/intel/idpf/idpf_lib.c:855) idpf
[ 72.452695] idpf_initiate_soft_reset (drivers/net/ethernet/intel/idpf/idpf_lib.c:?) idpf
[ 72.452867] idpf_change_mtu (drivers/net/ethernet/intel/idpf/idpf_lib.c:2189) idpf
[ 72.453015] netif_set_mtu_ext (net/core/dev.c:9437)
[ 72.453157] ? packet_notifier (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 net/packet/af_packet.c:4240)
[ 72.453292] netif_set_mtu (net/core/dev.c:9515)
[ 72.453416] dev_set_mtu (net/core/dev_api.c:?)
[ 72.453534] bond_change_mtu (drivers/net/bonding/bond_main.c:4833)
[ 72.453666] netif_set_mtu_ext (net/core/dev.c:9437)
[ 72.453803] do_setlink (net/core/rtnetlink.c:3116)
[ 72.453925] ? rtnl_newlink (net/core/rtnetlink.c:3901)
[ 72.454055] ? rtnl_newlink (net/core/rtnetlink.c:3901)
[ 72.454185] ? rtnl_newlink (net/core/rtnetlink.c:3901)
[ 72.454314] ? trace_contention_end (include/trace/events/lock.h:122)
[ 72.454467] ? __mutex_lock (arch/x86/include/asm/preempt.h:85 kernel/locking/mutex.c:611 kernel/locking/mutex.c:746)
[ 72.454597] ? cap_capable (include/trace/events/capability.h:26)
[ 72.454721] ? security_capable (security/security.c:?)
[ 72.454857] rtnl_newlink (net/core/rtnetlink.c:?)
[ 72.454982] ? lock_is_held_type (kernel/locking/lockdep.c:5599 kernel/locking/lockdep.c:5938)
[ 72.455121] ? __lock_acquire (kernel/locking/lockdep.c:?)
[ 72.455256] ? __change_page_attr_set_clr (arch/x86/mm/pat/set_memory.c:685)
[ 72.455438] ? __lock_acquire (kernel/locking/lockdep.c:?)
[ 72.455582] ? rtnetlink_rcv_msg (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 net/core/rtnetlink.c:6885)
[ 72.455721] ? lock_acquire (kernel/locking/lockdep.c:5866)
[ 72.455848] ? rtnetlink_rcv_msg (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 net/core/rtnetlink.c:6885)
[ 72.455987] ? lock_release (kernel/locking/lockdep.c:?)
[ 72.456117] ? rcu_read_unlock (include/linux/rcupdate.h:341 include/linux/rcupdate.h:871)
[ 72.456249] ? __pfx_rtnl_newlink (net/core/rtnetlink.c:3956)
[ 72.456388] rtnetlink_rcv_msg (net/core/rtnetlink.c:6955)
[ 72.456526] ? rtnetlink_rcv_msg (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 net/core/rtnetlink.c:6885)
[ 72.456671] ? lock_acquire (kernel/locking/lockdep.c:5866)
[ 72.456802] ? net_generic (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 include/net/netns/generic.h:45)
[ 72.456929] ? __pfx_rtnetlink_rcv_msg (net/core/rtnetlink.c:6858)
[ 72.457082] netlink_rcv_skb (net/netlink/af_netlink.c:2534)
[ 72.457212] netlink_unicast (net/netlink/af_netlink.c:1313)
[ 72.457344] netlink_sendmsg (net/netlink/af_netlink.c:1883)
[ 72.457476] __sock_sendmsg (net/socket.c:712)
[ 72.457602] ____sys_sendmsg (net/socket.c:?)
[ 72.457735] ? _copy_from_user (arch/x86/include/asm/uaccess_64.h:126 arch/x86/include/asm/uaccess_64.h:134 arch/x86/include/asm/uaccess_64.h:141 include/linux/uaccess.h:178 lib/usercopy.c:18)
[ 72.457875] ___sys_sendmsg (net/socket.c:2620)
[ 72.458042] ? __call_rcu_common (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:119 arch/x86/include/asm/irqflags.h:159 kernel/rcu/tree.c:3107)
[ 72.458185] ? mntput_no_expire (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 fs/namespace.c:1457)
[ 72.458324] ? lock_acquire (kernel/locking/lockdep.c:5866)
[ 72.458451] ? mntput_no_expire (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 fs/namespace.c:1457)
[ 72.458588] ? lock_release (kernel/locking/lockdep.c:?)
[ 72.458718] ? mntput_no_expire (include/linux/rcupdate.h:331 include/linux/rcupdate.h:841 fs/namespace.c:1457)
[ 72.458856] __x64_sys_sendmsg (net/socket.c:2652)
[ 72.458997] ? do_syscall_64 (arch/x86/include/asm/irqflags.h:42 arch/x86/include/asm/irqflags.h:119 include/linux/entry-common.h:198 arch/x86/entry/syscall_64.c:90)
[ 72.459136] do_syscall_64 (arch/x86/entry/syscall_64.c:?)
[ 72.459259] ? exc_page_fault (arch/x86/mm/fault.c:1542)
[ 72.459387] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[ 72.459555] RIP: 0033:0x7fd15f17cbd0
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
net/core/dev.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/core/dev.c b/net/core/dev.c
index fccf2167b2352f0cba80651c8245c9d5260ac205..e2d6ce96a8897066e03a6c8754c861983bbe4ceb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4815,6 +4815,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
use_local_napi:
+ DEBUG_NET_WARN_ON_ONCE(!list_empty(&napi->poll_list));
list_add_tail(&napi->poll_list, &sd->poll_list);
WRITE_ONCE(napi->list_owner, smp_processor_id());
/* If not called from net_rx_action()
@@ -7476,9 +7477,14 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = __napi_poll(n, &do_repoll);
- if (do_repoll)
+ if (do_repoll) {
+#if defined(CONFIG_DEBUG_NET)
+ if (unlikely(!napi_is_scheduled(n)))
+ pr_crit("repoll requested for device %s %ps but napi is not scheduled.\n",
+ n->dev->name, n->poll);
+#endif
list_add_tail(&n->poll_list, repoll);
-
+ }
netpoll_poll_unlock(have);
return work;
--
2.49.0.1101.gccaa498523-goog
Powered by blists - more mailing lists