[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211202032139.3156411-4-eric.dumazet@gmail.com>
Date: Wed, 1 Dec 2021 19:21:23 -0800
From: Eric Dumazet <eric.dumazet@...il.com>
To: "David S . Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>
Cc: netdev <netdev@...r.kernel.org>,
Dmitry Vyukov <dvyukov@...gle.com>,
Eric Dumazet <edumazet@...gle.com>,
Eric Dumazet <eric.dumazet@...il.com>
Subject: [PATCH net-next 03/19] net: add dev_hold_track() and dev_put_track() helpers
From: Eric Dumazet <edumazet@...gle.com>
They should replace dev_hold() and dev_put().
To use these helpers, each data structure owning a refcount
should also use a "netdevice_tracker" to pair the hold and put.
Whenever a leak happens, we will get precise stack traces
of the point dev_hold_track() happened, at device dismantle phase.
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
include/linux/netdevice.h | 32 ++++++++++++++++++++++++++++++++
net/Kconfig | 8 ++++++++
net/core/dev.c | 3 +++
3 files changed, 43 insertions(+)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index db3bff1ae7fdf5ff0b0546cbd0102b86f04fa144..3ddced0fa20f5c7a4548c340788214ea909a265f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -48,6 +48,7 @@
#include <uapi/linux/pkt_cls.h>
#include <linux/hashtable.h>
#include <linux/rbtree.h>
+#include <linux/ref_tracker.h>
struct netpoll_info;
struct device;
@@ -300,6 +301,12 @@ enum netdev_state_t {
};
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+typedef struct ref_tracker *netdevice_tracker;
+#else
+typedef struct {} netdevice_tracker;
+#endif
+
struct gro_list {
struct list_head list;
int count;
@@ -2178,6 +2185,7 @@ struct net_device {
#else
refcount_t dev_refcnt;
#endif
+ struct ref_tracker_dir refcnt_tracker;
struct list_head link_watch_list;
@@ -3805,6 +3813,7 @@ void netdev_run_todo(void);
* @dev: network device
*
* Release reference to device to allow it to be freed.
+ * Try using dev_put_track() instead.
*/
static inline void dev_put(struct net_device *dev)
{
@@ -3822,6 +3831,7 @@ static inline void dev_put(struct net_device *dev)
* @dev: network device
*
* Hold reference to device to keep it from being freed.
+ * Try using dev_hold_track() instead.
*/
static inline void dev_hold(struct net_device *dev)
{
@@ -3834,6 +3844,28 @@ static inline void dev_hold(struct net_device *dev)
}
}
+static inline void dev_hold_track(struct net_device *dev,
+ netdevice_tracker *tracker, gfp_t gfp)
+{
+ if (dev) {
+ dev_hold(dev);
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ ref_tracker_alloc(&dev->refcnt_tracker, tracker, gfp);
+#endif
+ }
+}
+
+static inline void dev_put_track(struct net_device *dev,
+ netdevice_tracker *tracker)
+{
+ if (dev) {
+#ifdef CONFIG_NET_DEV_REFCNT_TRACKER
+ ref_tracker_free(&dev->refcnt_tracker, tracker);
+#endif
+ dev_put(dev);
+ }
+}
+
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
* and _off may be called from IRQ context, but it is caller
* who is responsible for serialization of these calls.
diff --git a/net/Kconfig b/net/Kconfig
index 8a1f9d0287de3c32040eee03b60114c6e6d150bc..0b665e60b53490f44eeda1e5506d4e125ef4c53a 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -253,6 +253,14 @@ config PCPU_DEV_REFCNT
network device refcount are using per cpu variables if this option is set.
This can be forced to N to detect underflows (with a performance drop).
+config NET_DEV_REFCNT_TRACKER
+ bool "Enable tracking in dev_put_track() and dev_hold_track()"
+ select REF_TRACKER
+ default n
+ help
+ Enable debugging feature to track leaked device references.
+ This adds memory and cpu costs.
+
config RPS
bool
depends on SMP && SYSFS
diff --git a/net/core/dev.c b/net/core/dev.c
index d30adecc2bb2b744b283dbda89d9488b8eb6d47e..bdfbfa4e291858f990f5e2c99c4ce0ee9ed687cd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9861,6 +9861,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
netdev_unregister_timeout_secs * HZ)) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, refcnt);
+ ref_tracker_dir_print(&dev->refcnt_tracker, 10);
warning_time = jiffies;
}
}
@@ -10151,6 +10152,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
+ ref_tracker_dir_init(&dev->refcnt_tracker, 128);
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
@@ -10267,6 +10269,7 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+ ref_tracker_dir_exit(&dev->refcnt_tracker);
#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
--
2.34.0.rc2.393.gf8c9666880-goog
Powered by blists - more mailing lists