lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <DC343C80-8B0C-4A7A-A532-FCB2860062AC@bamaicloud.com>
Date: Mon, 10 Nov 2025 16:51:43 +0800
From: Tonghao Zhang <tonghao@...aicloud.com>
To: Tonghao Zhang <tonghao@...aicloud.com>
Cc: Jay Vosburgh <jv@...sburgh.net>,
 netdev@...r.kernel.org,
 "David S. Miller" <davem@...emloft.net>,
 Eric Dumazet <edumazet@...gle.com>,
 Jakub Kicinski <kuba@...nel.org>,
 Paolo Abeni <pabeni@...hat.com>,
 Simon Horman <horms@...nel.org>,
 Jonathan Corbet <corbet@....net>,
 Andrew Lunn <andrew+netdev@...n.ch>,
 Nikolay Aleksandrov <razor@...ckwall.org>,
 Hangbin Liu <liuhangbin@...il.com>
Subject: Re: [PATCH v2] net: bonding: use atomic instead of rtnl_mutex, to
 make sure peer notify updated



> On Nov 4, 2025, at 22:48, Tonghao Zhang <tonghao@...aicloud.com> wrote:
> 
> 
> 
>> On Nov 4, 2025, at 05:48, Jay Vosburgh <jv@...sburgh.net> wrote:
>> 
>> Tonghao Zhang <tonghao@...aicloud.com> wrote:
>> 
>>> Using atomic to protect the send_peer_notif instead of rtnl_mutex.
>>> This approach allows safe updates in both interrupt and process
>>> contexts, while avoiding code complexity.
>>> 
>>> In lacp mode, the rtnl might be locked, preventing ad_cond_set_peer_notif()
>>> from acquiring the lock and updating send_peer_notif. This patch addresses
>>> the issue by using a atomic. Since updating send_peer_notif does not
>>> require high real-time performance, such atomic updates are acceptable.
>>> 
>>> After coverting the rtnl lock for send_peer_notif to atomic, in bond_mii_monitor(),
>>> we should check the should_notify_peers (rtnllock required) instead of
>>> send_peer_notif. By the way, to avoid peer notify event loss, we check
>>> again whether to send peer notify, such as active-backup mode failover.
>>> 
>>> Cc: Jay Vosburgh <jv@...sburgh.net>
>>> Cc: "David S. Miller" <davem@...emloft.net>
>>> Cc: Eric Dumazet <edumazet@...gle.com>
>>> Cc: Jakub Kicinski <kuba@...nel.org>
>>> Cc: Paolo Abeni <pabeni@...hat.com>
>>> Cc: Simon Horman <horms@...nel.org>
>>> Cc: Jonathan Corbet <corbet@....net>
>>> Cc: Andrew Lunn <andrew+netdev@...n.ch>
>>> Cc: Nikolay Aleksandrov <razor@...ckwall.org>
>>> Cc: Hangbin Liu <liuhangbin@...il.com>
>>> Suggested-by: Jay Vosburgh <jv@...sburgh.net>
>>> Signed-off-by: Tonghao Zhang <tonghao@...aicloud.com>
>>> ---
>>> v2:
>>> - refine the codes
>>> - check bond_should_notify_peers again in bond_mii_monitor(), to avoid
>>> event loss. 
>>> - v1 https://patchwork.kernel.org/project/netdevbpf/patch/20251026095614.48833-1-tonghao@bamaicloud.com/
>>> ---
>>> drivers/net/bonding/bond_3ad.c  |  7 ++---
>>> drivers/net/bonding/bond_main.c | 46 ++++++++++++++++-----------------
>>> include/net/bonding.h           |  9 ++++++-
>>> 3 files changed, 32 insertions(+), 30 deletions(-)
>>> 
>>> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
>>> index 49717b7b82a2..05c573e45450 100644
>>> --- a/drivers/net/bonding/bond_3ad.c
>>> +++ b/drivers/net/bonding/bond_3ad.c
>>> @@ -999,11 +999,8 @@ static void ad_cond_set_peer_notif(struct port *port)
>>> {
>>> struct bonding *bond = port->slave->bond;
>>> 
>>> - if (bond->params.broadcast_neighbor && rtnl_trylock()) {
>>> - bond->send_peer_notif = bond->params.num_peer_notif *
>>> - max(1, bond->params.peer_notif_delay);
>>> - rtnl_unlock();
>>> - }
>>> + if (bond->params.broadcast_neighbor)
>>> + bond_peer_notify_reset(bond);
>>> }
>>> 
>>> /**
>>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>>> index 8e592f37c28b..ae90221838d4 100644
>>> --- a/drivers/net/bonding/bond_main.c
>>> +++ b/drivers/net/bonding/bond_main.c
>>> @@ -1167,10 +1167,11 @@ static bool bond_should_notify_peers(struct bonding *bond)
>>> {
>>> struct bond_up_slave *usable;
>>> struct slave *slave = NULL;
>>> + int send_peer_notif;
>>> 
>>> - if (!bond->send_peer_notif ||
>>> -     bond->send_peer_notif %
>>> -     max(1, bond->params.peer_notif_delay) != 0 ||
>>> + send_peer_notif = atomic_read(&bond->send_peer_notif);
>>> + if (!send_peer_notif ||
>>> +     send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 ||
>>>    !netif_carrier_ok(bond->dev))
>>> return false;
>>> 
>>> @@ -1270,8 +1271,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
>>>      BOND_SLAVE_NOTIFY_NOW);
>>> 
>>> if (new_active) {
>>> - bool should_notify_peers = false;
>>> -
>>> bond_set_slave_active_flags(new_active,
>>>    BOND_SLAVE_NOTIFY_NOW);
>>> 
>>> @@ -1280,19 +1279,17 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
>>>      old_active);
>>> 
>>> if (netif_running(bond->dev)) {
>>> - bond->send_peer_notif =
>>> - bond->params.num_peer_notif *
>>> - max(1, bond->params.peer_notif_delay);
>>> - should_notify_peers =
>>> - bond_should_notify_peers(bond);
>>> + bond_peer_notify_reset(bond);
>>> +
>>> + if (bond_should_notify_peers(bond)) {
>>> + atomic_dec(&bond->send_peer_notif);
>>> + call_netdevice_notifiers(
>>> + NETDEV_NOTIFY_PEERS,
>>> + bond->dev);
>>> + }
>>> }
>>> 
>>> call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev);
>>> - if (should_notify_peers) {
>>> - bond->send_peer_notif--;
>>> - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>>> -  bond->dev);
>>> - }
>>> }
>>> }
>>> 
>>> @@ -2801,7 +2798,7 @@ static void bond_mii_monitor(struct work_struct *work)
>>> 
>>> rcu_read_unlock();
>>> 
>>> - if (commit || bond->send_peer_notif) {
>>> + if (commit || should_notify_peers) {
>>> /* Race avoidance with bond_close cancel of workqueue */
>>> if (!rtnl_trylock()) {
>>> delay = 1;
>>> @@ -2816,16 +2813,15 @@ static void bond_mii_monitor(struct work_struct *work)
>>> bond_miimon_commit(bond);
>>> }
>>> 
>>> - if (bond->send_peer_notif) {
>>> - bond->send_peer_notif--;
>>> - if (should_notify_peers)
>>> - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>>> -  bond->dev);
>>> - }
>>> + /* check again to avoid send_peer_notif has been changed. */
>>> + if (bond_should_notify_peers(bond))
>>> + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev);
>> 
>> Is the risk here that user space may have set send_peer_notify
>> to zero?
> If user sapce set the bond_should_notify_peers == 0,  bond_should_notify_peers return the false. So NETDEV_NOTIFY_PEERS is disalbed, there is no peer notify.
>> 
>> 
>>> 
>>> rtnl_unlock(); /* might sleep, hold no other locks */
>>> }
>>> 
>>> + atomic_dec_if_positive(&bond->send_peer_notif);
>>> +
>> 
>> Also, it's a bit subtle, but I think this has to be outside of
>> the if block, or peer_notif_delay may be unreliable.  I'm not sure it
>> needs a comment, but could you confirm that's why this line is where it
>> is?
> Yes, I will add comment in next version. That is why this line is here.
> - whether there is a commit/peer notify or not,  send_peer_notif-- in each loop. Therefore should be placed outside of if block.
> - make sure send_peer_notif-- after the commit or peer notify process to avoid that send_peer_notif—  but the rtnl_trylock failed.
> - regardless of whether send_peer_notif is set or not, atomic_dec_if_positive always be expected to execute and will not be less than 0.[will be comment that is safe.]
Although I have explained a lot, in fact, it is still more appropriate to put it in the if block.
Please help me review the next version
>> 
>> -J
>> 
>>> re_arm:
>>> if (bond->params.miimon)
>>> queue_delayed_work(bond->wq, &bond->mii_work, delay);
>>> @@ -3773,7 +3769,7 @@ static void bond_activebackup_arp_mon(struct bonding *bond)
>>> return;
>>> 
>>> if (should_notify_peers) {
>>> - bond->send_peer_notif--;
>>> + atomic_dec(&bond->send_peer_notif);
>>> call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
>>> bond->dev);
>>> }
>>> @@ -4267,6 +4263,8 @@ static int bond_open(struct net_device *bond_dev)
>>> queue_delayed_work(bond->wq, &bond->alb_work, 0);
>>> }
>>> 
>>> + atomic_set(&bond->send_peer_notif, 0);
>>> +
>>> if (bond->params.miimon)  /* link check interval, in milliseconds. */
>>> queue_delayed_work(bond->wq, &bond->mii_work, 0);
>>> 
>>> @@ -4300,7 +4298,7 @@ static int bond_close(struct net_device *bond_dev)
>>> struct slave *slave;
>>> 
>>> bond_work_cancel_all(bond);
>>> - bond->send_peer_notif = 0;
>>> + atomic_set(&bond->send_peer_notif, 0);
>>> if (bond_is_lb(bond))
>>> bond_alb_deinitialize(bond);
>>> bond->recv_probe = NULL;
>>> diff --git a/include/net/bonding.h b/include/net/bonding.h
>>> index 49edc7da0586..afdfcb5bfaf0 100644
>>> --- a/include/net/bonding.h
>>> +++ b/include/net/bonding.h
>>> @@ -236,7 +236,7 @@ struct bonding {
>>> */
>>> spinlock_t mode_lock;
>>> spinlock_t stats_lock;
>>> - u32  send_peer_notif;
>>> + atomic_t send_peer_notif;
>>> u8       igmp_retrans;
>>> #ifdef CONFIG_PROC_FS
>>> struct   proc_dir_entry *proc_entry;
>>> @@ -814,4 +814,11 @@ static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *s
>>> return NET_XMIT_DROP;
>>> }
>>> 
>>> +static inline void bond_peer_notify_reset(struct bonding *bond)
>>> +{
>>> + atomic_set(&bond->send_peer_notif,
>>> + bond->params.num_peer_notif *
>>> + max(1, bond->params.peer_notif_delay));
>>> +}
>>> +
>>> #endif /* _NET_BONDING_H */
>>> -- 
>>> 2.34.1
>>> 
>> 
>> ---
>> -Jay Vosburgh, jv@...sburgh.net



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ