lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <13bccadd7dcc66283898cde11520918670e942db.1716202430.git.pabeni@redhat.com>
Date: Mon, 20 May 2024 12:59:14 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	David Ahern <dsahern@...nel.org>,
	Sabrina Dubroca <sd@...asysnail.net>
Subject: [RFC PATCH] net: flush dst_cache on device removal

Eric reported that dst_cache don't cope correctly with device removal,
keeping the cached dst unmodified even when the underlining device is
deleted and the dst itself is not uncached.

The above causes the infamous 'unregistering netdevice' hangup.

Address the issue implementing explicit book-keeping of all the
initialized dst_caches. At network device unregistration time, traverse
them, looking for relevant dst and eventually replace the dst reference
with a blackhole one.

Use an xarray to store the dst_cache references, to avoid blocking the
BH during the traversal for a possibly unbounded time.

Reported-by: Eric Dumazet <edumazet@...gle.com>
Fixes: 911362c70df5 ("net: add dst_cache support")
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
I can't reproduce the issue locally, I hope somebody able to observe it
could step-in and give this patch a shot.
---
 include/net/dst_cache.h |   1 +
 net/core/dst_cache.c    | 100 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index b4a55d2d5e71..4d0302e143b4 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -11,6 +11,7 @@
 struct dst_cache {
 	struct dst_cache_pcpu __percpu *cache;
 	unsigned long reset_ts;
+	u32 id;
 };
 
 /**
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 6a0482e676d3..22921cc9d42c 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -7,6 +7,8 @@
 
 #include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/xarray.h>
+#include <linux/rcupdate_wait.h>
 #include <net/dst_cache.h>
 #include <net/route.h>
 #if IS_ENABLED(CONFIG_IPV6)
@@ -14,6 +16,13 @@
 #endif
 #include <uapi/linux/in.h>
 
+static DEFINE_XARRAY_FLAGS(dst_caches, XA_FLAGS_ALLOC);
+
+struct dst_cache_entry {
+	struct dst_cache_pcpu __percpu *cache;
+	struct rcu_head rcu;
+};
+
 struct dst_cache_pcpu {
 	unsigned long refresh_ts;
 	struct dst_entry *dst;
@@ -140,27 +149,60 @@ EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
 
 int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
 {
+	struct dst_cache_entry *entry;
+	int last_id, ret = -ENOMEM;
+
 	dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
 					    gfp | __GFP_ZERO);
 	if (!dst_cache->cache)
 		return -ENOMEM;
 
+	entry = kmalloc(sizeof(*entry), gfp | __GFP_ZERO);
+	if (!entry)
+		goto free_cache;
+
+	ret = xa_alloc_cyclic_bh(&dst_caches, &dst_cache->id, entry,
+				 xa_limit_32b, &last_id, gfp);
+	if (ret < 0)
+		goto free_entry;
+
+	entry->cache = dst_cache->cache;
 	dst_cache_reset(dst_cache);
 	return 0;
+
+free_entry:
+	kfree(entry);
+
+free_cache:
+	free_percpu(dst_cache->cache);
+	dst_cache->cache = NULL;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(dst_cache_init);
 
+static void dst_cache_entry_free(struct rcu_head *rcu)
+{
+	struct dst_cache_entry *entry = container_of(rcu, struct dst_cache_entry, rcu);
+
+	free_percpu(entry->cache);
+	kfree(entry);
+}
+
 void dst_cache_destroy(struct dst_cache *dst_cache)
 {
+	struct dst_cache_entry *entry;
 	int i;
 
 	if (!dst_cache->cache)
 		return;
 
+	entry = xa_erase_bh(&dst_caches, dst_cache->id);
+
 	for_each_possible_cpu(i)
 		dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
 
-	free_percpu(dst_cache->cache);
+	if (!WARN_ON_ONCE(!entry))
+		call_rcu(&entry->rcu, dst_cache_entry_free);
 }
 EXPORT_SYMBOL_GPL(dst_cache_destroy);
 
@@ -182,3 +224,59 @@ void dst_cache_reset_now(struct dst_cache *dst_cache)
 	}
 }
 EXPORT_SYMBOL_GPL(dst_cache_reset_now);
+
+static void dst_cache_flush_dev(struct dst_cache_entry *entry,
+				struct net_device *dev)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct dst_cache_pcpu *idst = per_cpu_ptr(entry->cache, i);
+		struct dst_entry *dst = READ_ONCE(idst->dst);
+
+		if (!dst || !dst_hold_safe(dst))
+			continue;
+
+		if (!list_empty(&dst->rt_uncached) || dst->dev != dev)
+			goto release;
+
+		dst->dev = blackhole_netdev;
+		netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
+				   GFP_ATOMIC);
+
+release:
+		dst_release(dst);
+	}
+}
+
+static int dst_cache_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct dst_cache_entry *entry;
+	XA_STATE(xas, &dst_caches, 0);
+
+	if (event == NETDEV_UNREGISTER) {
+		rcu_read_lock();
+		xas_for_each(&xas, entry, UINT_MAX) {
+			dst_cache_flush_dev(entry, dev);
+			if (need_resched()) {
+				xas_pause(&xas);
+				cond_resched_rcu();
+			}
+		}
+		rcu_read_unlock();
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dst_cache_notifier = {
+	.notifier_call = dst_cache_netdev_event,
+};
+
+static int __init dst_cache_notifier_init(void)
+{
+	return register_netdevice_notifier(&dst_cache_notifier);
+}
+
+subsys_initcall(dst_cache_notifier_init);
-- 
2.43.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ