[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251110091337.43517-1-tonghao@bamaicloud.com>
Date: Mon, 10 Nov 2025 17:13:37 +0800
From: Tonghao Zhang <tonghao@...aicloud.com>
To: netdev@...r.kernel.org
Cc: Tonghao Zhang <tonghao@...aicloud.com>,
Jay Vosburgh <jv@...sburgh.net>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>,
Jonathan Corbet <corbet@....net>,
Andrew Lunn <andrew+netdev@...n.ch>,
Nikolay Aleksandrov <razor@...ckwall.org>,
Hangbin Liu <liuhangbin@...il.com>
Subject: [PATCH net-next v4] net: bonding: use atomic instead of rtnl_mutex, to make sure peer notify updated
Using atomic to protect the send_peer_notif instead of rtnl_mutex.
This approach allows safe updates in both interrupt and process
contexts, while avoiding code complexity.
In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif()
from acquiring the lock and updating send_peer_notif. This patch addresses
the issue by using a atomic. Since updating send_peer_notif does not
require high real-time performance, such atomic updates are acceptable.
By the way, send_peer_notif is reset atomically. For avoiding peer notify
event loss, we should invoke bond_should_notify_peers() to check whether
to send peer notify in bond_mii_monitor() and bond_activebackup_arp_mon().
More importantly, send_peer_notif-- should be placed with send_peer_notif
*if* block [2]. Otherwise [1], if atomic_dec is executed immediately after
resetting the send_peer_notif, it is very likely that the event will be lost.
- [1]:
if (send_peer_notif) {
...
}
// if reset and then atomic_dec, event will be lost.
atomic_dec()
- [2] should be changed:
if (send_peer_notif) {
...
// if reset and then atomic_dec, event may be lost.
// but we are already in notify context.
atomic_dec()
}
Cc: Jay Vosburgh <jv@...sburgh.net>
Cc: "David S. Miller" <davem@...emloft.net>
Cc: Eric Dumazet <edumazet@...gle.com>
Cc: Jakub Kicinski <kuba@...nel.org>
Cc: Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>
Cc: Jonathan Corbet <corbet@....net>
Cc: Andrew Lunn <andrew+netdev@...n.ch>
Cc: Nikolay Aleksandrov <razor@...ckwall.org>
Cc: Hangbin Liu <liuhangbin@...il.com>
Suggested-by: Jay Vosburgh <jv@...sburgh.net>
Signed-off-by: Tonghao Zhang <tonghao@...aicloud.com>
---
v4:
- fix race in bond_activebackup_arp_mon()
- check send_peer_notif instead of should_notify_peers.
- move send_peer_notif-- in if block.
v3:
- add the comment, *_dec_if_positive is safe.
v2:
- refine the codes
- check bond_should_notify_peers again in bond_mii_monitor(), to avoid event loss.
v1:
- https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/
---
drivers/net/bonding/bond_3ad.c | 7 +---
drivers/net/bonding/bond_main.c | 71 +++++++++++++++++----------------
include/net/bonding.h | 9 ++++-
3 files changed, 47 insertions(+), 40 deletions(-)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 49717b7b82a2..05c573e45450 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port)
{
struct bonding *bond = port->slave->bond;
- if (bond->params.broadcast_neighbor && rtnl_trylock()) {
- bond->send_peer_notif = bond->params.num_peer_notif *
- max(1, bond->params.peer_notif_delay);
- rtnl_unlock();
- }
+ if (bond->params.broadcast_neighbor)
+ bond_peer_notify_reset(bond);
}
/**
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e592f37c28b..386efac3bc64 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond)
{
struct bond_up_slave *usable;
struct slave *slave = NULL;
+ int send_peer_notif;
- if (!bond->send_peer_notif ||
- bond->send_peer_notif %
- max(1, bond->params.peer_notif_delay) != 0 ||
+ send_peer_notif = atomic_read(&bond->send_peer_notif);
+ if (!send_peer_notif ||
+ send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 ||
!netif_carrier_ok(bond->dev))
return false;
@@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
BOND_SLAVE_NOTIFY_NOW);
if (new_active) {
- bool should_notify_peers = false;
-
bond_set_slave_active_flags(new_active,
BOND_SLAVE_NOTIFY_NOW);
@@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
old_active);
if (netif_running(bond->dev)) {
- bond->send_peer_notif =
- bond->params.num_peer_notif *
- max(1, bond->params.peer_notif_delay);
- should_notify_peers =
- bond_should_notify_peers(bond);
+ bond_peer_notify_reset(bond);
+
+ if (bond_should_notify_peers(bond)) {
+ atomic_dec(&bond->send_peer_notif);
+ call_netdevice_notifiers(
+ NETDEV_NOTIFY_PEERS,
+ bond->dev);
+ }
}
call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
- if (should_notify_peers) {
- bond->send_peer_notif--;
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
- bond->dev);
- }
}
}
@@ -2783,11 +2780,11 @@ static void bond_mii_monitor(struct work_struct *work)
{
struct bonding *bond = container_of(work, struct bonding,
mii_work.work);
- bool should_notify_peers;
- bool commit;
- unsigned long delay;
- struct slave *slave;
struct list_head *iter;
+ struct slave *slave;
+ unsigned long delay;
+ int send_peer_notif;
+ bool commit;
delay = msecs_to_jiffies(bond->params.miimon);
@@ -2796,12 +2793,12 @@ static void bond_mii_monitor(struct work_struct *work)
rcu_read_lock();
- should_notify_peers = bond_should_notify_peers(bond);
+ send_peer_notif = atomic_read(&bond->send_peer_notif);
commit = !!bond_miimon_inspect(bond);
rcu_read_unlock();
- if (commit || bond->send_peer_notif) {
+ if (commit || send_peer_notif) {
/* Race avoidance with bond_close cancel of workqueue */
if (!rtnl_trylock()) {
delay = 1;
@@ -2816,11 +2813,13 @@ static void bond_mii_monitor(struct work_struct *work)
bond_miimon_commit(bond);
}
- if (bond->send_peer_notif) {
- bond->send_peer_notif--;
- if (should_notify_peers)
+ if (send_peer_notif) {
+ if (bond_should_notify_peers(bond))
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
+
+ /* It's safe even when send_peer_notif was reset. */
+ atomic_dec_if_positive(&bond->send_peer_notif);
}
rtnl_unlock(); /* might sleep, hold no other locks */
@@ -3732,8 +3731,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
static void bond_activebackup_arp_mon(struct bonding *bond)
{
- bool should_notify_peers = false;
bool should_notify_rtnl = false;
+ int send_peer_notif = 0;
int delta_in_ticks;
delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
@@ -3743,15 +3742,12 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
rcu_read_lock();
- should_notify_peers = bond_should_notify_peers(bond);
-
if (bond_ab_arp_inspect(bond)) {
rcu_read_unlock();
/* Race avoidance with bond_close flush of workqueue */
if (!rtnl_trylock()) {
delta_in_ticks = 1;
- should_notify_peers = false;
goto re_arm;
}
@@ -3762,21 +3758,26 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
}
should_notify_rtnl = bond_ab_arp_probe(bond);
+ send_peer_notif = atomic_read(&bond->send_peer_notif);
+
rcu_read_unlock();
re_arm:
if (bond->params.arp_interval)
queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
- if (should_notify_peers || should_notify_rtnl) {
+ if (send_peer_notif || should_notify_rtnl) {
if (!rtnl_trylock())
return;
- if (should_notify_peers) {
- bond->send_peer_notif--;
- call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
- bond->dev);
+ if (send_peer_notif) {
+ if (bond_should_notify_peers(bond))
+ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+ bond->dev);
+ /* It's safe even when send_peer_notif was reset. */
+ atomic_dec_if_positive(&bond->send_peer_notif);
}
+
if (should_notify_rtnl) {
bond_slave_state_notify(bond);
bond_slave_link_notify(bond);
@@ -4267,6 +4268,8 @@ static int bond_open(struct net_device *bond_dev)
queue_delayed_work(bond->wq, &bond->alb_work, 0);
}
+ atomic_set(&bond->send_peer_notif, 0);
+
if (bond->params.miimon) /* link check interval, in milliseconds. */
queue_delayed_work(bond->wq, &bond->mii_work, 0);
@@ -4300,7 +4303,7 @@ static int bond_close(struct net_device *bond_dev)
struct slave *slave;
bond_work_cancel_all(bond);
- bond->send_peer_notif = 0;
+ atomic_set(&bond->send_peer_notif, 0);
if (bond_is_lb(bond))
bond_alb_deinitialize(bond);
bond->recv_probe = NULL;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 49edc7da0586..afdfcb5bfaf0 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -236,7 +236,7 @@ struct bonding {
*/
spinlock_t mode_lock;
spinlock_t stats_lock;
- u32 send_peer_notif;
+ atomic_t send_peer_notif;
u8 igmp_retrans;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_entry;
@@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s
return NET_XMIT_DROP;
}
+static inline void bond_peer_notify_reset(struct bonding *bond)
+{
+ atomic_set(&bond->send_peer_notif,
+ bond->params.num_peer_notif *
+ max(1, bond->params.peer_notif_delay));
+}
+
#endif /* _NET_BONDING_H */
--
2.34.1
Powered by blists - more mailing lists