lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251110091337.43517-1-tonghao@bamaicloud.com>
Date: Mon, 10 Nov 2025 17:13:37 +0800
From: Tonghao Zhang <tonghao@...aicloud.com>
To: netdev@...r.kernel.org
Cc: Tonghao Zhang <tonghao@...aicloud.com>,
	Jay Vosburgh <jv@...sburgh.net>,
	"David S. Miller" <davem@...emloft.net>,
	Eric Dumazet <edumazet@...gle.com>,
	Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>,
	Simon Horman <horms@...nel.org>,
	Jonathan Corbet <corbet@....net>,
	Andrew Lunn <andrew+netdev@...n.ch>,
	Nikolay Aleksandrov <razor@...ckwall.org>,
	Hangbin Liu <liuhangbin@...il.com>
Subject: [PATCH net-next v4] net: bonding: use atomic instead of rtnl_mutex, to make sure peer notify updated

Using atomic to protect the send_peer_notif instead of rtnl_mutex.
This approach allows safe updates in both interrupt and process
contexts, while avoiding code complexity.

In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif()
from acquiring the lock and updating send_peer_notif. This patch addresses
the issue by using a atomic. Since updating send_peer_notif does not
require high real-time performance, such atomic updates are acceptable.

By the way, send_peer_notif is reset atomically. For avoiding peer notify
event loss, we should invoke bond_should_notify_peers() to check whether
to send peer notify in bond_mii_monitor() and bond_activebackup_arp_mon().

More importantly, send_peer_notif-- should be placed with send_peer_notif
*if* block [2]. Otherwise [1], if atomic_dec is executed immediately after
resetting the send_peer_notif, it is very likely that the event will be lost.
- [1]:
	if (send_peer_notif) {
		...
	}
	// if reset and then atomic_dec, event will be lost.
	atomic_dec()

- [2] should be changed:
	if (send_peer_notif) {
		...
		// if reset and then atomic_dec, event may be lost.
		// but we are already in notify context.
		atomic_dec()
	}

Cc: Jay Vosburgh <jv@...sburgh.net>
Cc: "David S. Miller" <davem@...emloft.net>
Cc: Eric Dumazet <edumazet@...gle.com>
Cc: Jakub Kicinski <kuba@...nel.org>
Cc: Paolo Abeni <pabeni@...hat.com>
Cc: Simon Horman <horms@...nel.org>
Cc: Jonathan Corbet <corbet@....net>
Cc: Andrew Lunn <andrew+netdev@...n.ch>
Cc: Nikolay Aleksandrov <razor@...ckwall.org>
Cc: Hangbin Liu <liuhangbin@...il.com>
Suggested-by: Jay Vosburgh <jv@...sburgh.net>
Signed-off-by: Tonghao Zhang <tonghao@...aicloud.com>
---
v4:
- fix race in bond_activebackup_arp_mon()
- check send_peer_notif instead of should_notify_peers.
- move send_peer_notif-- in if block.
v3:
- add the comment, *_dec_if_positive is safe.
v2:
- refine the codes
- check bond_should_notify_peers again in bond_mii_monitor(), to avoid event loss.
v1:
- https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/
---
 drivers/net/bonding/bond_3ad.c  |  7 +---
 drivers/net/bonding/bond_main.c | 71 +++++++++++++++++----------------
 include/net/bonding.h           |  9 ++++-
 3 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 49717b7b82a2..05c573e45450 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port)
 {
 	struct bonding *bond = port->slave->bond;
 
-	if (bond->params.broadcast_neighbor && rtnl_trylock()) {
-		bond->send_peer_notif = bond->params.num_peer_notif *
-			max(1, bond->params.peer_notif_delay);
-		rtnl_unlock();
-	}
+	if (bond->params.broadcast_neighbor)
+		bond_peer_notify_reset(bond);
 }
 
 /**
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e592f37c28b..386efac3bc64 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond)
 {
 	struct bond_up_slave *usable;
 	struct slave *slave = NULL;
+	int send_peer_notif;
 
-	if (!bond->send_peer_notif ||
-	    bond->send_peer_notif %
-	    max(1, bond->params.peer_notif_delay) != 0 ||
+	send_peer_notif = atomic_read(&bond->send_peer_notif);
+	if (!send_peer_notif ||
+	    send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 ||
 	    !netif_carrier_ok(bond->dev))
 		return false;
 
@@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 						      BOND_SLAVE_NOTIFY_NOW);
 
 		if (new_active) {
-			bool should_notify_peers = false;
-
 			bond_set_slave_active_flags(new_active,
 						    BOND_SLAVE_NOTIFY_NOW);
 
@@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 						      old_active);
 
 			if (netif_running(bond->dev)) {
-				bond->send_peer_notif =
-					bond->params.num_peer_notif *
-					max(1, bond->params.peer_notif_delay);
-				should_notify_peers =
-					bond_should_notify_peers(bond);
+				bond_peer_notify_reset(bond);
+
+				if (bond_should_notify_peers(bond)) {
+					atomic_dec(&bond->send_peer_notif);
+					call_netdevice_notifiers(
+							NETDEV_NOTIFY_PEERS,
+							bond->dev);
+				}
 			}
 
 			call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
-			if (should_notify_peers) {
-				bond->send_peer_notif--;
-				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
-							 bond->dev);
-			}
 		}
 	}
 
@@ -2783,11 +2780,11 @@ static void bond_mii_monitor(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
 					    mii_work.work);
-	bool should_notify_peers;
-	bool commit;
-	unsigned long delay;
-	struct slave *slave;
 	struct list_head *iter;
+	struct slave *slave;
+	unsigned long delay;
+	int send_peer_notif;
+	bool commit;
 
 	delay = msecs_to_jiffies(bond->params.miimon);
 
@@ -2796,12 +2793,12 @@ static void bond_mii_monitor(struct work_struct *work)
 
 	rcu_read_lock();
 
-	should_notify_peers = bond_should_notify_peers(bond);
+	send_peer_notif = atomic_read(&bond->send_peer_notif);
 	commit = !!bond_miimon_inspect(bond);
 
 	rcu_read_unlock();
 
-	if (commit || bond->send_peer_notif) {
+	if (commit || send_peer_notif) {
 		/* Race avoidance with bond_close cancel of workqueue */
 		if (!rtnl_trylock()) {
 			delay = 1;
@@ -2816,11 +2813,13 @@ static void bond_mii_monitor(struct work_struct *work)
 			bond_miimon_commit(bond);
 		}
 
-		if (bond->send_peer_notif) {
-			bond->send_peer_notif--;
-			if (should_notify_peers)
+		if (send_peer_notif) {
+			if (bond_should_notify_peers(bond))
 				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
 							 bond->dev);
+
+			/* It's safe even when send_peer_notif was reset. */
+			atomic_dec_if_positive(&bond->send_peer_notif);
 		}
 
 		rtnl_unlock();	/* might sleep, hold no other locks */
@@ -3732,8 +3731,8 @@ static bool bond_ab_arp_probe(struct bonding *bond)
 
 static void bond_activebackup_arp_mon(struct bonding *bond)
 {
-	bool should_notify_peers = false;
 	bool should_notify_rtnl = false;
+	int send_peer_notif = 0;
 	int delta_in_ticks;
 
 	delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
@@ -3743,15 +3742,12 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
 
 	rcu_read_lock();
 
-	should_notify_peers = bond_should_notify_peers(bond);
-
 	if (bond_ab_arp_inspect(bond)) {
 		rcu_read_unlock();
 
 		/* Race avoidance with bond_close flush of workqueue */
 		if (!rtnl_trylock()) {
 			delta_in_ticks = 1;
-			should_notify_peers = false;
 			goto re_arm;
 		}
 
@@ -3762,21 +3758,26 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
 	}
 
 	should_notify_rtnl = bond_ab_arp_probe(bond);
+	send_peer_notif = atomic_read(&bond->send_peer_notif);
+
 	rcu_read_unlock();
 
 re_arm:
 	if (bond->params.arp_interval)
 		queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks);
 
-	if (should_notify_peers || should_notify_rtnl) {
+	if (send_peer_notif || should_notify_rtnl) {
 		if (!rtnl_trylock())
 			return;
 
-		if (should_notify_peers) {
-			bond->send_peer_notif--;
-			call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
-						 bond->dev);
+		if (send_peer_notif) {
+			if (bond_should_notify_peers(bond))
+				call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
+							 bond->dev);
+			/* It's safe even when send_peer_notif was reset. */
+			atomic_dec_if_positive(&bond->send_peer_notif);
 		}
+
 		if (should_notify_rtnl) {
 			bond_slave_state_notify(bond);
 			bond_slave_link_notify(bond);
@@ -4267,6 +4268,8 @@ static int bond_open(struct net_device *bond_dev)
 			queue_delayed_work(bond->wq, &bond->alb_work, 0);
 	}
 
+	atomic_set(&bond->send_peer_notif, 0);
+
 	if (bond->params.miimon)  /* link check interval, in milliseconds. */
 		queue_delayed_work(bond->wq, &bond->mii_work, 0);
 
@@ -4300,7 +4303,7 @@ static int bond_close(struct net_device *bond_dev)
 	struct slave *slave;
 
 	bond_work_cancel_all(bond);
-	bond->send_peer_notif = 0;
+	atomic_set(&bond->send_peer_notif, 0);
 	if (bond_is_lb(bond))
 		bond_alb_deinitialize(bond);
 	bond->recv_probe = NULL;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 49edc7da0586..afdfcb5bfaf0 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -236,7 +236,7 @@ struct bonding {
 	 */
 	spinlock_t mode_lock;
 	spinlock_t stats_lock;
-	u32	 send_peer_notif;
+	atomic_t send_peer_notif;
 	u8       igmp_retrans;
 #ifdef CONFIG_PROC_FS
 	struct   proc_dir_entry *proc_entry;
@@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s
 	return NET_XMIT_DROP;
 }
 
+static inline void bond_peer_notify_reset(struct bonding *bond)
+{
+	atomic_set(&bond->send_peer_notif,
+		bond->params.num_peer_notif *
+		max(1, bond->params.peer_notif_delay));
+}
+
 #endif /* _NET_BONDING_H */
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ