[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d440f35aab4aadb49dabc086e45f865cf0eeaa5e.1768225160.git.petrm@nvidia.com>
Date: Wed, 14 Jan 2026 10:54:45 +0100
From: Petr Machata <petrm@...dia.com>
To: "David S. Miller" <davem@...emloft.net>, Eric Dumazet
<edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni
<pabeni@...hat.com>, Simon Horman <horms@...nel.org>,
<netdev@...r.kernel.org>
CC: Ido Schimmel <idosch@...dia.com>, Kuniyuki Iwashima <kuniyu@...gle.com>,
Breno Leitao <leitao@...ian.org>, Andy Roulin <aroulin@...dia.com>,
"Francesco Ruggeri" <fruggeri@...sta.com>, Stephen Hemminger
<stephen@...workplumber.org>, Petr Machata <petrm@...dia.com>,
<mlxsw@...dia.com>
Subject: [PATCH net-next 2/8] net: core: neighbour: Call __neigh_notify() under a lock
Andy Roulin has described an issue with the current neighbor notification
scheme as follows. This was also presented publicly at the link below.
neigh_update sends a rtnl notification if an update, e.g.,
nud_state change, was done but there is no guarantee of
ordering of the rtnl notifications. Consider the following
scenario:
userspace thread kernel thread
================ =============
neigh_update
write_lock_bh(n->lock)
n->nud_state = STALE
write_unlock_bh(n->lock)
neigh_notify
neigh_fill_info
read_lock_bh(n->lock)
ndm->nud_state = STALE
read_unlock_bh(n->lock)
-------------------------->
neigh:update
write_lock_bh(n->lock)
n->nud_state = REACHABLE
write_unlock_bh(n->lock)
neigh_notify
neigh_fill_info
read_lock_bh(n->lock)
ndm->nud_state = REACHABLE
read_unlock_bh(n->lock)
rtnl_nofify
RTNL REACHABLE sent
<--------
rtnl_notify
RTNL STALE sent
In this scenario, the kernel neigh is updated first to STALE and
then REACHABLE but the netlink notifications are sent out of order,
first REACHABLE and then STALE.
The solution is to send the netlink message inside the same critical
section that formats the message. That way both the contents and ordering
of the message reflect the same state, and we cannot see the abovementioned
out-of-order delivery.
Even with this patch, an issue remains that the contents of the message may
not reflect the changes made to the neighbor. A kernel thread might still
interrupt a userspace thread after the change is done, but before
formatting and sending the message. Then what we would see is two messages
with the same contents. The following patches will attempt to address that
issue.
To support those future patches, convert __neigh_notify() to a helper that
assumes that the neighbor lock is already taken by having it call
__neigh_fill_info() instead of neigh_fill_info(). Add a new helper,
neigh_notify(), which takes the lock before calling __neigh_notify().
Migrate all callers to use the latter.
Link: https://lore.kernel.org/netdev/ed6768c1-80b8-aee2-e545-b51661d49336@nvidia.com/
Signed-off-by: Petr Machata <petrm@...dia.com>
Reviewed-by: Ido Schimmel <idosch@...dia.com>
---
net/core/neighbour.c | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6cdd93dfa3ea..5a56b787b5ec 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -51,8 +51,7 @@ do { \
#define PNEIGH_HASHMASK 0xF
static void neigh_timer_handler(struct timer_list *t);
-static void __neigh_notify(struct neighbour *n, int type, int flags,
- u32 pid);
+static void neigh_notify(struct neighbour *n, int type, int flags, u32 pid);
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid);
static void pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm);
@@ -117,7 +116,7 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
trace_neigh_cleanup_and_release(neigh, 0);
- __neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
+ neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
neigh_release(neigh);
}
@@ -2740,7 +2739,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid)
{
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
- __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
+ neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid);
}
static bool neigh_master_filtered(struct net_device *dev, int master_idx)
@@ -3555,7 +3554,7 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
if (skb == NULL)
goto errout;
- err = neigh_fill_info(skb, n, pid, 0, type, flags);
+ err = __neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
@@ -3570,9 +3569,18 @@ static void __neigh_notify(struct neighbour *n, int type, int flags,
rcu_read_unlock();
}
+static void neigh_notify(struct neighbour *neigh, int type, int flags, u32 pid)
+ __releases(neigh->lock)
+ __acquires(neigh->lock)
+{
+ read_lock_bh(&neigh->lock);
+ __neigh_notify(neigh, type, flags, pid);
+ read_unlock_bh(&neigh->lock);
+}
+
void neigh_app_ns(struct neighbour *n)
{
- __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
+ neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0);
}
EXPORT_SYMBOL(neigh_app_ns);
--
2.51.1
Powered by blists - more mailing lists