[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <13bccadd7dcc66283898cde11520918670e942db.1716202430.git.pabeni@redhat.com>
Date: Mon, 20 May 2024 12:59:14 +0200
From: Paolo Abeni <pabeni@...hat.com>
To: netdev@...r.kernel.org
Cc: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
David Ahern <dsahern@...nel.org>,
Sabrina Dubroca <sd@...asysnail.net>
Subject: [RFC PATCH] net: flush dst_cache on device removal
Eric reported that dst_cache don't cope correctly with device removal,
keeping the cached dst unmodified even when the underlining device is
deleted and the dst itself is not uncached.
The above causes the infamous 'unregistering netdevice' hangup.
Address the issue implementing explicit book-keeping of all the
initialized dst_caches. At network device unregistration time, traverse
them, looking for relevant dst and eventually replace the dst reference
with a blackhole one.
Use an xarray to store the dst_cache references, to avoid blocking the
BH during the traversal for a possibly unbounded time.
Reported-by: Eric Dumazet <edumazet@...gle.com>
Fixes: 911362c70df5 ("net: add dst_cache support")
Signed-off-by: Paolo Abeni <pabeni@...hat.com>
---
I can't reproduce the issue locally, I hope somebody able to observe it
could step-in and give this patch a shot.
---
include/net/dst_cache.h | 1 +
net/core/dst_cache.c | 100 +++++++++++++++++++++++++++++++++++++++-
2 files changed, 100 insertions(+), 1 deletion(-)
diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h
index b4a55d2d5e71..4d0302e143b4 100644
--- a/include/net/dst_cache.h
+++ b/include/net/dst_cache.h
@@ -11,6 +11,7 @@
struct dst_cache {
struct dst_cache_pcpu __percpu *cache;
unsigned long reset_ts;
+ u32 id;
};
/**
diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c
index 6a0482e676d3..22921cc9d42c 100644
--- a/net/core/dst_cache.c
+++ b/net/core/dst_cache.c
@@ -7,6 +7,8 @@
#include <linux/kernel.h>
#include <linux/percpu.h>
+#include <linux/xarray.h>
+#include <linux/rcupdate_wait.h>
#include <net/dst_cache.h>
#include <net/route.h>
#if IS_ENABLED(CONFIG_IPV6)
@@ -14,6 +16,13 @@
#endif
#include <uapi/linux/in.h>
+static DEFINE_XARRAY_FLAGS(dst_caches, XA_FLAGS_ALLOC);
+
+struct dst_cache_entry {
+ struct dst_cache_pcpu __percpu *cache;
+ struct rcu_head rcu;
+};
+
struct dst_cache_pcpu {
unsigned long refresh_ts;
struct dst_entry *dst;
@@ -140,27 +149,60 @@ EXPORT_SYMBOL_GPL(dst_cache_get_ip6);
int dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
{
+ struct dst_cache_entry *entry;
+ int last_id, ret = -ENOMEM;
+
dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
gfp | __GFP_ZERO);
if (!dst_cache->cache)
return -ENOMEM;
+ entry = kmalloc(sizeof(*entry), gfp | __GFP_ZERO);
+ if (!entry)
+ goto free_cache;
+
+ ret = xa_alloc_cyclic_bh(&dst_caches, &dst_cache->id, entry,
+ xa_limit_32b, &last_id, gfp);
+ if (ret < 0)
+ goto free_entry;
+
+ entry->cache = dst_cache->cache;
dst_cache_reset(dst_cache);
return 0;
+
+free_entry:
+ kfree(entry);
+
+free_cache:
+ free_percpu(dst_cache->cache);
+ dst_cache->cache = NULL;
+ return ret;
}
EXPORT_SYMBOL_GPL(dst_cache_init);
+static void dst_cache_entry_free(struct rcu_head *rcu)
+{
+ struct dst_cache_entry *entry = container_of(rcu, struct dst_cache_entry, rcu);
+
+ free_percpu(entry->cache);
+ kfree(entry);
+}
+
void dst_cache_destroy(struct dst_cache *dst_cache)
{
+ struct dst_cache_entry *entry;
int i;
if (!dst_cache->cache)
return;
+ entry = xa_erase_bh(&dst_caches, dst_cache->id);
+
for_each_possible_cpu(i)
dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
- free_percpu(dst_cache->cache);
+ if (!WARN_ON_ONCE(!entry))
+ call_rcu(&entry->rcu, dst_cache_entry_free);
}
EXPORT_SYMBOL_GPL(dst_cache_destroy);
@@ -182,3 +224,59 @@ void dst_cache_reset_now(struct dst_cache *dst_cache)
}
}
EXPORT_SYMBOL_GPL(dst_cache_reset_now);
+
+static void dst_cache_flush_dev(struct dst_cache_entry *entry,
+ struct net_device *dev)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ struct dst_cache_pcpu *idst = per_cpu_ptr(entry->cache, i);
+ struct dst_entry *dst = READ_ONCE(idst->dst);
+
+ if (!dst || !dst_hold_safe(dst))
+ continue;
+
+ if (!list_empty(&dst->rt_uncached) || dst->dev != dev)
+ goto release;
+
+ dst->dev = blackhole_netdev;
+ netdev_ref_replace(dev, blackhole_netdev, &dst->dev_tracker,
+ GFP_ATOMIC);
+
+release:
+ dst_release(dst);
+ }
+}
+
+static int dst_cache_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct dst_cache_entry *entry;
+ XA_STATE(xas, &dst_caches, 0);
+
+ if (event == NETDEV_UNREGISTER) {
+ rcu_read_lock();
+ xas_for_each(&xas, entry, UINT_MAX) {
+ dst_cache_flush_dev(entry, dev);
+ if (need_resched()) {
+ xas_pause(&xas);
+ cond_resched_rcu();
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block dst_cache_notifier = {
+ .notifier_call = dst_cache_netdev_event,
+};
+
+static int __init dst_cache_notifier_init(void)
+{
+ return register_netdevice_notifier(&dst_cache_notifier);
+}
+
+subsys_initcall(dst_cache_notifier_init);
--
2.43.2
Powered by blists - more mailing lists