lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <B6F453DD-3BDA-4618-AD2A-5B317C32048A@bamaicloud.com>
Date: Tue, 4 Nov 2025 22:48:04 +0800
From: Tonghao Zhang <tonghao@...aicloud.com>
To: Jay Vosburgh <jv@...sburgh.net>
Cc: netdev@...r.kernel.org,
 "David S. Miller" <davem@...emloft.net>,
 Eric Dumazet <edumazet@...gle.com>,
 Jakub Kicinski <kuba@...nel.org>,
 Paolo Abeni <pabeni@...hat.com>,
 Simon Horman <horms@...nel.org>,
 Jonathan Corbet <corbet@....net>,
 Andrew Lunn <andrew+netdev@...n.ch>,
 Nikolay Aleksandrov <razor@...ckwall.org>,
 Hangbin Liu <liuhangbin@...il.com>
Subject: Re: [PATCH v2] net: bonding: use atomic instead of rtnl_mutex, to
 make sure peer notify updated



> On Nov 4, 2025, at 05:48, Jay Vosburgh <jv@...sburgh.net> wrote:
> 
> Tonghao Zhang <tonghao@...aicloud.com> wrote:
> 
>> Using atomic to protect the send_peer_notif instead of rtnl_mutex.
>> This approach allows safe updates in both interrupt and process
>> contexts, while avoiding code complexity.
>> 
>> In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif()
>> from acquiring the lock and updating send_peer_notif. This patch addresses
>> the issue by using a atomic. Since updating send_peer_notif does not
>> require high real-time performance, such atomic updates are acceptable.
>> 
>> After coverting the rtnl lock for send_peer_notif to atomic, in bond_mii_monitor(),
>> we should check the should_notify_peers (rtnllock required) instead of
>> send_peer_notif. By the way, to avoid peer notify event loss, we check
>> again whether to send peer notify, such as active-backup mode failover.
>> 
>> Cc: Jay Vosburgh <jv@...sburgh.net>
>> Cc: "David S. Miller" <davem@...emloft.net>
>> Cc: Eric Dumazet <edumazet@...gle.com>
>> Cc: Jakub Kicinski <kuba@...nel.org>
>> Cc: Paolo Abeni <pabeni@...hat.com>
>> Cc: Simon Horman <horms@...nel.org>
>> Cc: Jonathan Corbet <corbet@....net>
>> Cc: Andrew Lunn <andrew+netdev@...n.ch>
>> Cc: Nikolay Aleksandrov <razor@...ckwall.org>
>> Cc: Hangbin Liu <liuhangbin@...il.com>
>> Suggested-by: Jay Vosburgh <jv@...sburgh.net>
>> Signed-off-by: Tonghao Zhang <tonghao@...aicloud.com>
>> ---
>> v2:
>> - refine the codes
>> - check bond_should_notify_peers again in bond_mii_monitor(), to avoid
>> event loss. 
>> - v1 https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/
>> ---
>> drivers/net/bonding/bond_3ad.c  |  7 ++---
>> drivers/net/bonding/bond_main.c | 46 ++++++++++++++++-----------------
>> include/net/bonding.h           |  9 ++++++-
>> 3 files changed, 32 insertions(+), 30 deletions(-)
>> 
>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>> index 49717b7b82a2..05c573e45450 100644
>> --- a/drivers/net/bonding/bond_3ad.c
>> +++ b/drivers/net/bonding/bond_3ad.c
>> @@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port)
>> {
>> struct bonding *bond = port->slave->bond;
>> 
>> - if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>> - bond->send_peer_notif = bond->params.num_peer_notif *
>> - max(1, bond->params.peer_notif_delay);
>> - rtnl_unlock();
>> - }
>> + if (bond->params.broadcast_neighbor)
>> + bond_peer_notify_reset(bond);
>> }
>> 
>> /**
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index 8e592f37c28b..ae90221838d4 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond)
>> {
>> struct bond_up_slave *usable;
>> struct slave *slave = NULL;
>> + int send_peer_notif;
>> 
>> - if (!bond->send_peer_notif ||
>> -     bond->send_peer_notif %
>> -     max(1, bond->params.peer_notif_delay) != 0 ||
>> + send_peer_notif = atomic_read(&bond->send_peer_notif);
>> + if (!send_peer_notif ||
>> +     send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 ||
>>     !netif_carrier_ok(bond->dev))
>> return false;
>> 
>> @@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
>>       BOND_SLAVE_NOTIFY_NOW);
>> 
>> if (new_active) {
>> - bool should_notify_peers = false;
>> -
>> bond_set_slave_active_flags(new_active,
>>     BOND_SLAVE_NOTIFY_NOW);
>> 
>> @@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
>>       old_active);
>> 
>> if (netif_running(bond->dev)) {
>> - bond->send_peer_notif =
>> - bond->params.num_peer_notif *
>> - max(1, bond->params.peer_notif_delay);
>> - should_notify_peers =
>> - bond_should_notify_peers(bond);
>> + bond_peer_notify_reset(bond);
>> +
>> + if (bond_should_notify_peers(bond)) {
>> + atomic_dec(&bond->send_peer_notif);
>> + call_netdevice_notifiers(
>> + NETDEV_NOTIFY_PEERS,
>> + bond->dev);
>> + }
>> }
>> 
>> call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
>> - if (should_notify_peers) {
>> - bond->send_peer_notif--;
>> - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>> -  bond->dev);
>> - }
>> }
>> }
>> 
>> @@ -2801,7 +2798,7 @@ static void bond_mii_monitor(struct work_struct *work)
>> 
>> rcu_read_unlock();
>> 
>> - if (commit || bond->send_peer_notif) {
>> + if (commit || should_notify_peers) {
>> /* Race avoidance with bond_close cancel of workqueue */
>> if (!rtnl_trylock()) {
>> delay = 1;
>> @@ -2816,16 +2813,15 @@ static void bond_mii_monitor(struct work_struct *work)
>> bond_miimon_commit(bond);
>> }
>> 
>> - if (bond->send_peer_notif) {
>> - bond->send_peer_notif--;
>> - if (should_notify_peers)
>> - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>> -  bond->dev);
>> - }
>> + /* check again to avoid send_peer_notif has been changed. */
>> + if (bond_should_notify_peers(bond))
>> + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
> 
> Is the risk here that user space may have set send_peer_notify
> to zero?
If user sapce set the bond_should_notify_peers == 0,  bond_should_notify_peers return the false. So NETDEV_NOTIFY_PEERS is disalbed, there is no peer notify.
> 
> 
>> 
>> rtnl_unlock(); /* might sleep, hold no other locks */
>> }
>> 
>> + atomic_dec_if_positive(&bond->send_peer_notif);
>> +
> 
> Also, it's a bit subtle, but I think this has to be outside of
> the if block, or peer_notif_delay may be unreliable.  I'm not sure it
> needs a comment, but could you confirm that's why this line is where it
> is?
Yes, I will add comment in next version. That is why this line is here.
- whether there is a commit/peer notify or not,  send_peer_notif-- in each loop. Therefore should be placed outside of if block.
- make sure send_peer_notif-- after the commit or peer notify process to avoid that send_peer_notif—  but the rtnl_trylock failed.
- regardless of whether send_peer_notif is set or not, atomic_dec_if_positive always be expected to execute and will not be less than 0.[will be comment that is safe.]
> 
> -J
> 
>> re_arm:
>> if (bond->params.miimon)
>> queue_delayed_work(bond->wq, &bond->mii_work, delay);
>> @@ -3773,7 +3769,7 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
>> return;
>> 
>> if (should_notify_peers) {
>> - bond->send_peer_notif--;
>> + atomic_dec(&bond->send_peer_notif);
>> call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>>  bond->dev);
>> }
>> @@ -4267,6 +4263,8 @@ static int bond_open(struct net_device *bond_dev)
>> queue_delayed_work(bond->wq, &bond->alb_work, 0);
>> }
>> 
>> + atomic_set(&bond->send_peer_notif, 0);
>> +
>> if (bond->params.miimon)  /* link check interval, in milliseconds. */
>> queue_delayed_work(bond->wq, &bond->mii_work, 0);
>> 
>> @@ -4300,7 +4298,7 @@ static int bond_close(struct net_device *bond_dev)
>> struct slave *slave;
>> 
>> bond_work_cancel_all(bond);
>> - bond->send_peer_notif = 0;
>> + atomic_set(&bond->send_peer_notif, 0);
>> if (bond_is_lb(bond))
>> bond_alb_deinitialize(bond);
>> bond->recv_probe = NULL;
>> diff --git a/include/net/bonding.h b/include/net/bonding.h
>> index 49edc7da0586..afdfcb5bfaf0 100644
>> --- a/include/net/bonding.h
>> +++ b/include/net/bonding.h
>> @@ -236,7 +236,7 @@ struct bonding {
>>  */
>> spinlock_t mode_lock;
>> spinlock_t stats_lock;
>> - u32  send_peer_notif;
>> + atomic_t send_peer_notif;
>> u8       igmp_retrans;
>> #ifdef CONFIG_PROC_FS
>> struct   proc_dir_entry *proc_entry;
>> @@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s
>> return NET_XMIT_DROP;
>> }
>> 
>> +static inline void bond_peer_notify_reset(struct bonding *bond)
>> +{
>> + atomic_set(&bond->send_peer_notif,
>> + bond->params.num_peer_notif *
>> + max(1, bond->params.peer_notif_delay));
>> +}
>> +
>> #endif /* _NET_BONDING_H */
>> -- 
>> 2.34.1
>> 
> 
> ---
> -Jay Vosburgh, jv@...sburgh.net



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ