[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250716221221.442239-8-kuniyu@google.com>
Date: Wed, 16 Jul 2025 22:08:12 +0000
From: Kuniyuki Iwashima <kuniyu@...gle.com>
To: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, David Ahern <dsahern@...nel.org>
Cc: Simon Horman <horms@...nel.org>, Kuniyuki Iwashima <kuniyu@...gle.com>,
Kuniyuki Iwashima <kuni1840@...il.com>, netdev@...r.kernel.org
Subject: [PATCH v3 net-next 07/15] neighbour: Free pneigh_entry after RCU
grace period.
We will convert RTM_GETNEIGH to RCU.
neigh_get() looks up pneigh_entry by pneigh_lookup() and passes
it to pneigh_fill_info().
Then, we must ensure that the entry is alive till pneigh_fill_info()
completes, but read_lock_bh(&tbl->lock) in pneigh_lookup() does not
guarantee that.
Also, we will convert all readers of tbl->phash_buckets[] to RCU.
Let's use call_rcu() to free pneigh_entry and update phash_buckets[]
and ->next by rcu_assign_pointer().
pneigh_ifdown_and_unlock() uses list_head to avoid overwriting
->next and moving RCU iterators to another list.
pndisc_destructor() (only IPv6 ndisc uses this) uses a mutex, so it
is not delayed to call_rcu(), where we cannot sleep. This is fine
because the mcast code works with RCU and ipv6_dev_mc_dec() frees
mcast objects after RCU grace period.
While at it, we change the return type of pneigh_ifdown_and_unlock()
to void.
Signed-off-by: Kuniyuki Iwashima <kuniyu@...gle.com>
---
include/net/neighbour.h | 4 ++++
net/core/neighbour.c | 45 +++++++++++++++++++++++++----------------
2 files changed, 32 insertions(+), 17 deletions(-)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 1ddc44a042000..6d7f9aa53a7a9 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -180,6 +180,10 @@ struct pneigh_entry {
possible_net_t net;
struct net_device *dev;
netdevice_tracker dev_tracker;
+ union {
+ struct list_head free_node;
+ struct rcu_head rcu;
+ };
u32 flags;
u8 protocol;
bool permanent;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8bbf3ec592906..f5690e5904cba 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -54,9 +54,9 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev,
- bool skip_perm);
+static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+ struct net_device *dev,
+ bool skip_perm);
#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
@@ -810,6 +810,14 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl,
return n;
}
+static void pneigh_destroy(struct rcu_head *rcu)
+{
+ struct pneigh_entry *n = container_of(rcu, struct pneigh_entry, rcu);
+
+ netdev_put(n->dev, &n->dev_tracker);
+ kfree(n);
+}
+
int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
struct net_device *dev)
{
@@ -828,10 +836,11 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
net_eq(pneigh_net(n), net)) {
rcu_assign_pointer(*np, n->next);
write_unlock_bh(&tbl->lock);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- netdev_put(n->dev, &n->dev_tracker);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
return 0;
}
}
@@ -839,11 +848,12 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
return -ENOENT;
}
-static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev,
- bool skip_perm)
+static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
+ struct net_device *dev,
+ bool skip_perm)
{
- struct pneigh_entry *n, __rcu **np, *freelist = NULL;
+ struct pneigh_entry *n, __rcu **np;
+ LIST_HEAD(head);
u32 h;
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
@@ -853,24 +863,25 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
goto skip;
if (!dev || n->dev == dev) {
rcu_assign_pointer(*np, n->next);
- rcu_assign_pointer(n->next, freelist);
- freelist = n;
+ list_add(&n->free_node, &head);
continue;
}
skip:
np = &n->next;
}
}
+
write_unlock_bh(&tbl->lock);
- while ((n = freelist)) {
- freelist = rcu_dereference_protected(n->next, 1);
- n->next = NULL;
+
+ while (!list_empty(&head)) {
+ n = list_first_entry(&head, typeof(*n), free_node);
+ list_del(&n->free_node);
+
if (tbl->pdestructor)
tbl->pdestructor(n);
- netdev_put(n->dev, &n->dev_tracker);
- kfree(n);
+
+ call_rcu(&n->rcu, pneigh_destroy);
}
- return -ENOENT;
}
static inline void neigh_parms_put(struct neigh_parms *parms)
--
2.50.0.727.gbf7dc18ff4-goog
Powered by blists - more mailing lists