[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ba7de40b-81d5-426d-bad0-29ebe9161dd1@I-love.SAKURA.ne.jp>
Date: Wed, 19 Nov 2025 23:00:23 +0900
From: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>
To: syzbot <syzbot+881d65229ca4f9ae8c84@...kaller.appspotmail.com>
Cc: linux-kernel@...r.kernel.org
Subject: Re: unregister_netdevice: waiting for DEV to become free (8)
Too timing-dependent to trigger using a reproducer?
Or, a reproducer for an already-fixed bug is used?
#syz test
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d1a687444b27..798d60b3e2ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2084,6 +2084,8 @@ enum netdev_reg_state {
*
* FIXME: cleanup struct net_device such that network protocol info
* moves out.
+ *
+ * @netdev_trace_buffer_list: Linked list for debugging refcount leak.
*/
struct net_device {
@@ -2238,6 +2240,9 @@ struct net_device {
#if IS_ENABLED(CONFIG_TLS_DEVICE)
const struct tlsdev_ops *tlsdev_ops;
#endif
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ struct list_head netdev_trace_buffer_list;
+#endif
unsigned int operstate;
unsigned char link_mode;
@@ -3166,6 +3171,7 @@ enum netdev_cmd {
NETDEV_OFFLOAD_XSTATS_REPORT_USED,
NETDEV_OFFLOAD_XSTATS_REPORT_DELTA,
NETDEV_XDP_FEAT_CHANGE,
+ NETDEV_DEBUG_UNREGISTER,
};
const char *netdev_cmd_to_name(enum netdev_cmd cmd);
@@ -4345,9 +4351,15 @@ static inline bool dev_nit_active(const struct net_device *dev)
void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
+void save_netdev_trace_buffer(struct net_device *dev, int delta);
+int trim_netdev_trace(unsigned long *entries, int nr_entries);
+
static inline void __dev_put(struct net_device *dev)
{
if (dev) {
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ save_netdev_trace_buffer(dev, -1);
+#endif
#ifdef CONFIG_PCPU_DEV_REFCNT
this_cpu_dec(*dev->pcpu_refcnt);
#else
@@ -4359,6 +4371,9 @@ static inline void __dev_put(struct net_device *dev)
static inline void __dev_hold(struct net_device *dev)
{
if (dev) {
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ save_netdev_trace_buffer(dev, 1);
+#endif
#ifdef CONFIG_PCPU_DEV_REFCNT
this_cpu_inc(*dev->pcpu_refcnt);
#else
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 77198911b8dd..5f435c1e48d8 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -576,6 +576,10 @@ static inline bool lockdep_softirq_start(void) { return false; }
static inline void lockdep_softirq_end(bool in_hardirq) { }
#endif
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+static noinline void handle_softirqs(bool ksirqd);
+#endif
+
static void handle_softirqs(bool ksirqd)
{
unsigned long end = jiffies + MAX_SOFTIRQ_TIME;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 45320e27a16c..e9c654a9d0bb 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3145,6 +3145,10 @@ static bool manage_workers(struct worker *worker)
return true;
}
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+static noinline void process_one_work(struct worker *worker, struct work_struct *work);
+#endif
+
/**
* process_one_work - process single work
* @worker: self
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index a93af55df5fd..66e6624abfa3 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -124,6 +124,16 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data)
static DEFINE_MUTEX(j1939_netdev_lock);
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+static void dump_priv_trace_buffer(const struct net_device *ndev);
+static void erase_priv_trace_buffer(struct j1939_priv *priv);
+static noinline void save_priv_trace_buffer(struct j1939_priv *priv, int delta);
+#else
+static inline void dump_priv_trace_buffer(const struct net_device *ndev) { };
+static inline void erase_priv_trace_buffer(struct j1939_priv *priv) { };
+static inline void save_priv_trace_buffer(struct j1939_priv *priv, int delta) { };
+#endif
+
static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
{
struct j1939_priv *priv;
@@ -137,6 +147,7 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
priv->ndev = ndev;
kref_init(&priv->kref);
kref_init(&priv->rx_kref);
+ save_priv_trace_buffer(priv, 1);
dev_hold(ndev);
netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv);
@@ -164,17 +175,20 @@ static void __j1939_priv_release(struct kref *kref)
WARN_ON_ONCE(!list_empty(&priv->j1939_socks));
dev_put(ndev);
+ erase_priv_trace_buffer(priv);
kfree(priv);
}
void j1939_priv_put(struct j1939_priv *priv)
{
+ save_priv_trace_buffer(priv, -1);
kref_put(&priv->kref, __j1939_priv_release);
}
void j1939_priv_get(struct j1939_priv *priv)
{
kref_get(&priv->kref);
+ save_priv_trace_buffer(priv, 1);
}
static int j1939_can_rx_register(struct j1939_priv *priv)
@@ -282,6 +296,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
kref_get(&priv_new->rx_kref);
mutex_unlock(&j1939_netdev_lock);
dev_put(ndev);
+ erase_priv_trace_buffer(priv);
kfree(priv);
return priv_new;
}
@@ -299,6 +314,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
mutex_unlock(&j1939_netdev_lock);
dev_put(ndev);
+ erase_priv_trace_buffer(priv);
kfree(priv);
return ERR_PTR(ret);
@@ -364,6 +380,9 @@ static int j1939_netdev_notify(struct notifier_block *nb,
struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
struct j1939_priv *priv;
+ if (msg == NETDEV_DEBUG_UNREGISTER)
+ dump_priv_trace_buffer(ndev);
+
if (!can_ml)
goto notify_done;
@@ -428,3 +447,79 @@ static __exit void j1939_module_exit(void)
module_init(j1939_module_init);
module_exit(j1939_module_exit);
+
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+
+#define PRIV_TRACE_BUFFER_SIZE 1024
+static struct priv_trace_buffer {
+ struct j1939_priv *priv; // no-ref
+ struct net_device *ndev; // no-ref
+ atomic_t count;
+ int nr_entries;
+ unsigned long entries[20];
+} priv_trace_buffer[PRIV_TRACE_BUFFER_SIZE];
+static bool priv_trace_buffer_exhausted;
+
+static void dump_priv_trace_buffer(const struct net_device *ndev)
+{
+ struct priv_trace_buffer *ptr;
+ int count, balance = 0;
+ int i;
+
+ for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) {
+ ptr = &priv_trace_buffer[i];
+ if (!ptr->priv || ptr->ndev != ndev)
+ continue;
+ count = atomic_read(&ptr->count);
+ balance += count;
+ pr_info("Call trace for %s@%p %+d at\n", ndev->name, ptr->priv, count);
+ stack_trace_print(ptr->entries, ptr->nr_entries, 4);
+ }
+ if (!priv_trace_buffer_exhausted)
+ pr_info("balance for %s@...39_priv is %d\n", ndev->name, balance);
+ else
+ pr_info("balance for %s@...39_priv is unknown\n", ndev->name);
+}
+
+static void erase_priv_trace_buffer(struct j1939_priv *priv)
+{
+ int i;
+
+ for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++)
+ if (priv_trace_buffer[i].priv == priv)
+ priv_trace_buffer[i].priv = NULL;
+}
+
+static noinline void save_priv_trace_buffer(struct j1939_priv *priv, int delta)
+{
+ struct priv_trace_buffer *ptr;
+ unsigned long entries[ARRAY_SIZE(ptr->entries)];
+ unsigned long nr_entries;
+ int i;
+
+ if (in_nmi())
+ return;
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(ptr->entries), 1);
+ nr_entries = trim_netdev_trace(entries, nr_entries);
+ for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) {
+ ptr = &priv_trace_buffer[i];
+ if (ptr->priv == priv && ptr->nr_entries == nr_entries &&
+ !memcmp(ptr->entries, entries, nr_entries * sizeof(unsigned long))) {
+ atomic_add(delta, &ptr->count);
+ return;
+ }
+ }
+ for (i = 0; i < PRIV_TRACE_BUFFER_SIZE; i++) {
+ ptr = &priv_trace_buffer[i];
+ if (!ptr->priv && !cmpxchg(&ptr->priv, NULL, priv)) {
+ ptr->ndev = priv->ndev;
+ atomic_set(&ptr->count, delta);
+ ptr->nr_entries = nr_entries;
+ memmove(ptr->entries, entries, nr_entries * sizeof(unsigned long));
+ return;
+ }
+ }
+ priv_trace_buffer_exhausted = true;
+}
+
+#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 2acfa44927da..c3a62c16fa15 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1854,6 +1854,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
N(XDP_FEAT_CHANGE)
+ N(DEBUG_UNREGISTER)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -11429,6 +11430,14 @@ int netdev_refcnt_read(const struct net_device *dev)
}
EXPORT_SYMBOL(netdev_refcnt_read);
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+static void dump_netdev_trace_buffer(const struct net_device *dev);
+static void erase_netdev_trace_buffer(const struct net_device *dev);
+#else
+static inline void dump_netdev_trace_buffer(const struct net_device *dev) { }
+static inline void erase_netdev_trace_buffer(const struct net_device *dev) { }
+#endif
+
int netdev_unregister_timeout_secs __read_mostly = 10;
#define WAIT_REFS_MIN_MSECS 1
@@ -11502,11 +11511,16 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list)
if (time_after(jiffies, warning_time +
READ_ONCE(netdev_unregister_timeout_secs) * HZ)) {
+ rtnl_lock();
list_for_each_entry(dev, list, todo_list) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, netdev_refcnt_read(dev));
ref_tracker_dir_print(&dev->refcnt_tracker, 10);
+ call_netdevice_notifiers(NETDEV_DEBUG_UNREGISTER, dev);
+ dump_netdev_trace_buffer(dev);
}
+ __rtnl_unlock();
+ rcu_barrier();
warning_time = jiffies;
}
@@ -11904,6 +11918,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_len = sizeof_priv;
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ INIT_LIST_HEAD(&dev->netdev_trace_buffer_list);
+#endif
ref_tracker_dir_init(&dev->refcnt_tracker, 128, "netdev");
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
@@ -12076,6 +12093,8 @@ void free_netdev(struct net_device *dev)
mutex_destroy(&dev->lock);
+ erase_netdev_trace_buffer(dev);
+
/* Compatibility with error handling in drivers */
if (dev->reg_state == NETREG_UNINITIALIZED ||
dev->reg_state == NETREG_DUMMY) {
@@ -13090,3 +13109,180 @@ static int __init net_dev_init(void)
}
subsys_initcall(net_dev_init);
+
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+
+#define NETDEV_TRACE_BUFFER_SIZE 32768
+static struct netdev_trace_buffer {
+ struct list_head list;
+ int prev_count;
+ atomic_t count;
+ int nr_entries;
+ unsigned long entries[20];
+} netdev_trace_buffer[NETDEV_TRACE_BUFFER_SIZE];
+static LIST_HEAD(netdev_trace_buffer_list);
+static DEFINE_SPINLOCK(netdev_trace_buffer_lock);
+static bool netdev_trace_buffer_exhausted;
+
+static int netdev_trace_buffer_init(void)
+{
+ int i;
+
+ for (i = 0; i < NETDEV_TRACE_BUFFER_SIZE; i++)
+ list_add_tail(&netdev_trace_buffer[i].list, &netdev_trace_buffer_list);
+ return 0;
+}
+pure_initcall(netdev_trace_buffer_init);
+
+static void dump_netdev_trace_buffer(const struct net_device *dev)
+{
+ struct netdev_trace_buffer *ptr;
+ int count, balance = 0, pos = 0;
+
+ list_for_each_entry_rcu(ptr, &dev->netdev_trace_buffer_list, list,
+ /* list elements can't go away. */ 1) {
+ pos++;
+ count = atomic_read(&ptr->count);
+ balance += count;
+ if (ptr->prev_count == count)
+ continue;
+ ptr->prev_count = count;
+ pr_info("Call trace for %s[%d] %+d at\n", dev->name, pos, count);
+ stack_trace_print(ptr->entries, ptr->nr_entries, 4);
+ cond_resched();
+ }
+ if (!netdev_trace_buffer_exhausted)
+ pr_info("balance as of %s[%d] is %d\n", dev->name, pos, balance);
+}
+
+static void erase_netdev_trace_buffer(const struct net_device *dev)
+{
+ struct netdev_trace_buffer *ptr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&netdev_trace_buffer_lock, flags);
+ while (!list_empty(&dev->netdev_trace_buffer_list)) {
+ ptr = list_first_entry(&dev->netdev_trace_buffer_list, typeof(*ptr), list);
+ list_del(&ptr->list);
+ list_add_tail(&ptr->list, &netdev_trace_buffer_list);
+ }
+ spin_unlock_irqrestore(&netdev_trace_buffer_lock, flags);
+}
+
+#ifdef CONFIG_KALLSYMS
+static noinline unsigned long __find_trim(unsigned long *entries, int nr_entries, const char *name)
+{
+ int i;
+ char buffer[KSYM_SYMBOL_LEN];
+ const int len = strlen(name);
+
+ for (i = 0; i < nr_entries; i++) {
+ snprintf(buffer, sizeof(buffer), "%pS", (void *)entries[i]);
+ if (!strncmp(buffer, name, len) && buffer[len] == '+')
+ return entries[i];
+ }
+ return 0;
+}
+
+static unsigned long caller_handle_softirqs;
+static unsigned long caller_process_one_work;
+static unsigned long caller_ksys_unshare;
+static unsigned long caller___sys_bind;
+static unsigned long caller___sock_sendmsg;
+
+static int __init net_check_symbols(void)
+{
+ if (!kallsyms_lookup_name("handle_softirqs"))
+ caller_handle_softirqs = -1;
+ if (!kallsyms_lookup_name("process_one_work"))
+ caller_process_one_work = -1;
+ if (!kallsyms_lookup_name("ksys_unshare"))
+ caller_ksys_unshare = -1;
+ if (!kallsyms_lookup_name("__sys_bind"))
+ caller___sys_bind = -1;
+ if (!kallsyms_lookup_name("sock_sendmsg_nosec") &&
+ !kallsyms_lookup_name("__sock_sendmsg"))
+ caller___sock_sendmsg = -1;
+ return 0;
+}
+late_initcall(net_check_symbols);
+#endif
+
+int trim_netdev_trace(unsigned long *entries, int nr_entries)
+{
+#ifdef CONFIG_KALLSYMS
+ int i;
+
+ if (in_softirq()) {
+ if (unlikely(!caller_handle_softirqs))
+ caller_handle_softirqs = __find_trim(entries, nr_entries,
+ "handle_softirqs");
+ for (i = 0; i < nr_entries; i++)
+ if (entries[i] == caller_handle_softirqs)
+ return i + 1;
+ } else if (current->flags & PF_WQ_WORKER) {
+ if (unlikely(!caller_process_one_work))
+ caller_process_one_work = __find_trim(entries, nr_entries,
+ "process_one_work");
+ for (i = 0; i < nr_entries; i++)
+ if (entries[i] == caller_process_one_work)
+ return i + 1;
+ } else {
+ if (unlikely(!caller_ksys_unshare))
+ caller_ksys_unshare = __find_trim(entries, nr_entries, "ksys_unshare");
+ if (unlikely(!caller___sys_bind))
+ caller___sys_bind = __find_trim(entries, nr_entries, "__sys_bind");
+ if (unlikely(!caller___sock_sendmsg)) {
+ caller___sock_sendmsg = __find_trim(entries, nr_entries,
+ "sock_sendmsg_nosec");
+ if (!caller___sock_sendmsg)
+ caller___sock_sendmsg = __find_trim(entries, nr_entries,
+ "__sock_sendmsg");
+ }
+ for (i = 0; i < nr_entries; i++)
+ if (entries[i] == caller_ksys_unshare ||
+ entries[i] == caller___sys_bind ||
+ entries[i] == caller___sock_sendmsg)
+ return i + 1;
+ }
+#endif
+ return nr_entries;
+}
+EXPORT_SYMBOL(trim_netdev_trace);
+
+void save_netdev_trace_buffer(struct net_device *dev, int delta)
+{
+ struct netdev_trace_buffer *ptr;
+ unsigned long entries[ARRAY_SIZE(ptr->entries)];
+ unsigned long nr_entries;
+ unsigned long flags;
+
+ if (in_nmi())
+ return;
+ nr_entries = stack_trace_save(entries, ARRAY_SIZE(ptr->entries), 1);
+ nr_entries = trim_netdev_trace(entries, nr_entries);
+ list_for_each_entry_rcu(ptr, &dev->netdev_trace_buffer_list, list,
+ /* list elements can't go away. */ 1) {
+ if (ptr->nr_entries == nr_entries &&
+ !memcmp(ptr->entries, entries, nr_entries * sizeof(unsigned long))) {
+ atomic_add(delta, &ptr->count);
+ return;
+ }
+ }
+ spin_lock_irqsave(&netdev_trace_buffer_lock, flags);
+ if (!list_empty(&netdev_trace_buffer_list)) {
+ ptr = list_first_entry(&netdev_trace_buffer_list, typeof(*ptr), list);
+ list_del(&ptr->list);
+ ptr->prev_count = 0;
+ atomic_set(&ptr->count, delta);
+ ptr->nr_entries = nr_entries;
+ memmove(ptr->entries, entries, nr_entries * sizeof(unsigned long));
+ list_add_tail_rcu(&ptr->list, &dev->netdev_trace_buffer_list);
+ } else {
+ netdev_trace_buffer_exhausted = true;
+ }
+ spin_unlock_irqrestore(&netdev_trace_buffer_lock, flags);
+}
+EXPORT_SYMBOL(save_netdev_trace_buffer);
+
+#endif
diff --git a/net/core/lock_debug.c b/net/core/lock_debug.c
index 9e9fb25314b9..78d611bb6d1c 100644
--- a/net/core/lock_debug.c
+++ b/net/core/lock_debug.c
@@ -29,6 +29,7 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event,
case NETDEV_DOWN:
case NETDEV_REBOOT:
case NETDEV_UNREGISTER:
+ case NETDEV_DEBUG_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
case NETDEV_PRE_CHANGEADDR:
diff --git a/net/socket.c b/net/socket.c
index e8892b218708..fce536d2d8b9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -734,6 +734,10 @@ static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
return ret;
}
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+static noinline int __sock_sendmsg(struct socket *sock, struct msghdr *msg);
+#endif
+
static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
int err = security_socket_sendmsg(sock, msg,
Powered by blists - more mailing lists