[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250711191007.3591938-14-kuniyu@google.com>
Date: Fri, 11 Jul 2025 19:06:18 +0000
From: Kuniyuki Iwashima <kuniyu@...gle.com>
To: "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, David Ahern <dsahern@...nel.org>
Cc: Simon Horman <horms@...nel.org>, Kuniyuki Iwashima <kuniyu@...gle.com>,
Kuniyuki Iwashima <kuni1840@...il.com>, netdev@...r.kernel.org
Subject: [PATCH v1 net-next 13/14] neighbour: Protect tbl->phash_buckets[]
with a dedicated mutex.
tbl->phash_buckets[] is only modified in the slow path by pneigh_create()
and pneigh_delete() under the table lock.
Both of them are called under RTNL, so no extra lock is needed, but we
will remove RTNL from the paths.
pneigh_create() looks up a pneigh_entry, and this part can be lockless,
but it would complicate the logic like
1. lookup
2. allocate pengih_entry for GFP_KERNEL
3. lookup again but under lock
4. if found, return it after freeing the allocated memory
5. else, return the new one
Instead, let's add a per-table mutex and run lookup and allocation
under it.
Even though RTNL is removed, the neigh table is per-protocol one, so this
locking granularity is fine until we make the table per-netns.
Note that updating pneigh_entry part in neigh_add() is still protected
by RTNL and will be moved to pneigh_create() in the next patch.
Signed-off-by: Kuniyuki Iwashima <kuniyu@...gle.com>
---
include/net/neighbour.h | 1 +
net/core/neighbour.c | 39 +++++++++++++++++++++------------------
2 files changed, 22 insertions(+), 18 deletions(-)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 1670e2a388556..af6fe50703041 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -240,6 +240,7 @@ struct neigh_table {
unsigned long last_rand;
struct neigh_statistics __percpu *stats;
struct neigh_hash_table __rcu *nht;
+ struct mutex phash_lock;
struct pneigh_entry **phash_buckets;
};
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 9d716852e0e7d..78f2457a101c4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -54,9 +54,8 @@ static void neigh_timer_handler(struct timer_list *t);
static void __neigh_notify(struct neighbour *n, int type, int flags,
u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
-static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev,
- bool skip_perm);
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm);
#ifdef CONFIG_PROC_FS
static const struct seq_operations neigh_stat_seq_ops;
@@ -437,7 +436,9 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
{
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev, skip_perm);
- pneigh_ifdown_and_unlock(tbl, dev, skip_perm);
+ write_unlock_bh(&tbl->lock);
+
+ pneigh_ifdown(tbl, dev, skip_perm);
pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL,
tbl->family);
if (skb_queue_empty_lockless(&tbl->proxy_queue))
@@ -731,7 +732,7 @@ struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
key_len = tbl->key_len;
hash_val = pneigh_hash(pkey, key_len);
n = rcu_dereference_check(tbl->phash_buckets[hash_val],
- lockdep_is_held(&tbl->lock));
+ lockdep_is_held(&tbl->phash_lock));
while (n) {
if (!memcmp(n->key, pkey, key_len) &&
@@ -739,7 +740,7 @@ struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl,
(n->dev == dev || !n->dev))
return n;
- n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->lock));
+ n = rcu_dereference_check(n->next, lockdep_is_held(&tbl->phash_lock));
}
return NULL;
@@ -754,11 +755,9 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl,
unsigned int key_len;
u32 hash_val;
- ASSERT_RTNL();
+ mutex_lock(&tbl->phash_lock);
- read_lock_bh(&tbl->lock);
n = pneigh_lookup(tbl, net, pkey, dev);
- read_unlock_bh(&tbl->lock);
if (n)
goto out;
@@ -780,11 +779,10 @@ struct pneigh_entry *pneigh_create(struct neigh_table *tbl,
}
hash_val = pneigh_hash(pkey, key_len);
- write_lock_bh(&tbl->lock);
n->next = tbl->phash_buckets[hash_val];
rcu_assign_pointer(tbl->phash_buckets[hash_val], n);
- write_unlock_bh(&tbl->lock);
out:
+ mutex_unlock(&tbl->phash_lock);
return n;
}
@@ -803,13 +801,15 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
unsigned int key_len = tbl->key_len;
u32 hash_val = pneigh_hash(pkey, key_len);
- write_lock_bh(&tbl->lock);
+ mutex_lock(&tbl->phash_lock);
+
for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
np = &n->next) {
if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
net_eq(pneigh_net(n), net)) {
rcu_assign_pointer(*np, n->next);
- write_unlock_bh(&tbl->lock);
+
+ mutex_unlock(&tbl->phash_lock);
if (tbl->pdestructor)
tbl->pdestructor(n);
@@ -818,18 +818,20 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
return 0;
}
}
- write_unlock_bh(&tbl->lock);
+
+ mutex_unlock(&tbl->phash_lock);
return -ENOENT;
}
-static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
- struct net_device *dev,
- bool skip_perm)
+static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
+ bool skip_perm)
{
struct pneigh_entry *n, **np;
LIST_HEAD(head);
u32 h;
+ mutex_lock(&tbl->phash_lock);
+
for (h = 0; h <= PNEIGH_HASHMASK; h++) {
np = &tbl->phash_buckets[h];
while ((n = *np) != NULL) {
@@ -845,7 +847,7 @@ static void pneigh_ifdown_and_unlock(struct neigh_table *tbl,
}
}
- write_unlock_bh(&tbl->lock);
+ mutex_unlock(&tbl->phash_lock);
while (!list_empty(&head)) {
n = list_first_entry(&head, typeof(*n), free_node);
@@ -1792,6 +1794,7 @@ void neigh_table_init(int index, struct neigh_table *tbl)
WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
rwlock_init(&tbl->lock);
+ mutex_init(&tbl->phash_lock);
INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
queue_delayed_work(system_power_efficient_wq, &tbl->gc_work,
--
2.50.0.727.gbf7dc18ff4-goog
Powered by blists - more mailing lists