[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ed6768c1-80b8-aee2-e545-b51661d49336@nvidia.com>
Date: Mon, 6 Jun 2022 19:07:04 -0700
From: Andy Roulin <aroulin@...dia.com>
To: fruggeri@...sta.com
Cc: netdev@...r.kernel.org
Subject: Re: neighbour netlink notifications delivered in wrong order
Below is the patch I have been using and it has worked for me. I didn't
get a chance yet to test all cases or with net-next but I am planning to
send upstream.
----
neigh_update sends a rtnl notification if an update, e.g.,
nud_state change, was done but there is no guarantee of
ordering of the rtnl notifications. Consider the following
scenario:
userspace thread kernel thread
================ =============
neigh_update
write_lock_bh(n->lock)
n->nud_state = STALE
write_unlock_bh(n->lock)
neigh_notify
neigh_fill_info
read_lock_bh(n->lock)
ndm->nud_state = STALE
read_unlock_bh(n->lock)
-------------------------->
neigh:update
write_lock_bh(n->lock)
n->nud_state = REACHABLE
write_unlock_bh(n->lock)
neigh_notify
neigh_fill_info
read_lock_bh(n->lock)
ndm->nud_state = REACHABLE
read_unlock_bh(n->lock)
rtnl_nofify
RTNL REACHABLE sent
<--------
rtnl_notify
RTNL STALE sent
In this scenario, the kernel neigh is updated first to STALE and
then REACHABLE but the netlink notifications are sent out of order,
first REACHABLE and then STALE.
To fix this ordering, use read_lock_bh(n->lock) for both reading the
neigh state (neigh_fill_info) __and__ sending the netlink notification
(rtnl_notify).
Signed-off-by: Andy Roulin <aroulin@...dia.com>
---
net/core/neighbour.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 54625287ee5b..a91dfcbfc01c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2531,23 +2531,19 @@ static int neigh_fill_info(struct sk_buff *skb,
struct neighbour *neigh,
if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
goto nla_put_failure;
- read_lock_bh(&neigh->lock);
ndm->ndm_state = neigh->nud_state;
if (neigh->nud_state & NUD_VALID) {
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, neigh, neigh->dev);
- if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
- read_unlock_bh(&neigh->lock);
+ if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0)
goto nla_put_failure;
- }
}
ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1;
- read_unlock_bh(&neigh->lock);
if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
@@ -2674,10 +2670,15 @@ static int neigh_dump_table(struct neigh_table
*tbl, struct sk_buff *skb,
if (neigh_ifindex_filtered(n->dev, filter->dev_idx) ||
neigh_master_filtered(n->dev, filter->master_idx))
goto next;
- if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH,
- flags) < 0) {
+
+ read_lock_bh(&n->lock);
+ rc = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ RTM_NEWNEIGH,
+ flags);
+ read_unlock_bh(&n->lock);
+
+ if (rc < 0) {
rc = -1;
goto out;
}
@@ -2926,7 +2927,10 @@ static int neigh_get_reply(struct net *net,
struct neighbour *neigh,
if (!skb)
return -ENOBUFS;
+ read_lock_bh(&neigh->lock);
err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0);
+ read_unlock_bh(&neigh->lock);
+
if (err) {
kfree_skb(skb);
goto errout;
@@ -3460,14 +3464,17 @@ static void __neigh_notify(struct neighbour *n,
int type, int flags,
if (skb == NULL)
goto errout;
+ read_lock_bh(&n->lock);
err = neigh_fill_info(skb, n, pid, 0, type, flags);
if (err < 0) {
/* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
WARN_ON(err == -EMSGSIZE);
+ read_unlock_bh(&n->lock);
kfree_skb(skb);
goto errout;
}
rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ read_unlock_bh(&n->lock);
return;
errout:
if (err < 0)
--
2.20.1
Powered by blists - more mailing lists