lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7e0a0866-8e3c-4abd-8e4f-ac61cc04a69e@blackwall.org>
Date: Thu, 27 Jun 2024 11:29:21 +0300
From: Nikolay Aleksandrov <razor@...ckwall.org>
To: Hangbin Liu <liuhangbin@...il.com>,
 Jay Vosburgh <jay.vosburgh@...onical.com>
Cc: Jakub Kicinski <kuba@...nel.org>, netdev@...r.kernel.org,
 Andy Gospodarek <andy@...yhouse.net>, "David S. Miller"
 <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
 Paolo Abeni <pabeni@...hat.com>, Ido Schimmel <idosch@...dia.com>,
 Jiri Pirko <jiri@...nulli.us>, Amit Cohen <amcohen@...dia.com>
Subject: Re: [PATCHv3 net-next] bonding: 3ad: send ifinfo notify when mux
 state changed

On 27/06/2024 11:26, Hangbin Liu wrote:
> On Wed, Jun 26, 2024 at 05:06:00PM -0700, Jay Vosburgh wrote:
>>> Hits:
>>>
>>> RTNL: assertion failed at net/core/rtnetlink.c (1823)
> 
> Thanks for this hits...
> 
>>>
>>> On two selftests. Please run the selftests on a debug kernel..
> 
> OK, I will try run my tests on debug kernel in future.
> 
>>
>> 	Oh, I forgot about needing RTNL.
>>

+1 & facepalm, completely forgot it was running without rtnl

>> 	We cannot simply acquire RTNL in ad_mux_machine(), as the
>> bond->mode_lock is already held, and the lock ordering must be RTNL
>> first, then mode_lock, lest we deadlock.
>>
>> 	Hangbin, I'd suggest you look at how bond_netdev_notify_work()
>> complies with the lock ordering (basically, doing the actual work out of
>> line in a workqueue event), or how the "should_notify" flag is used in
>> bond_3ad_state_machine_handler().  The first is more complicated, but
>> won't skip events; the second may miss intermediate state transitions if
>> it cannot acquire RTNL and has to delay the notification.
> 
> I think the administer doesn't want to loose the state change info. So how
> about something like:
> 

You can (and will) miss events with the below code. It is kind of best effort,
but if the notification is not run before the next state change, you will
lose the intermediate changes.

> diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
> index c6807e473ab7..046f11c5c47a 100644
> --- a/drivers/net/bonding/bond_3ad.c
> +++ b/drivers/net/bonding/bond_3ad.c
> @@ -1185,6 +1185,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
>  		default:
>  			break;
>  		}
> +
> +		port->slave->should_notify_lacp = 1;
>  	}
>  }
>  
> @@ -2527,6 +2529,9 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
>  		bond_slave_state_notify(bond);
>  		rtnl_unlock();
>  	}
> +
> +	/* Notify the mux state changes */
> +	bond_slave_link_notify(bond);
>  	queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks);
>  }
>  
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index 3c3fcce4acd4..db8f2fb613df 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -1748,11 +1748,19 @@ static void bond_netdev_notify_work(struct work_struct *_work)
>  					   notify_work.work);
>  
>  	if (rtnl_trylock()) {
> -		struct netdev_bonding_info binfo;
> +		if (slave->should_notify_link) {
> +			struct netdev_bonding_info binfo;
> +			bond_fill_ifslave(slave, &binfo.slave);
> +			bond_fill_ifbond(slave->bond, &binfo.master);
> +			netdev_bonding_info_change(slave->dev, &binfo);
> +			slave->should_notify_link = 0;
> +		}
> +
> +		if (slave->should_notify_lacp) {
> +			rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_KERNEL, 0, NULL);
> +			slave->should_notify_lacp = 0;
> +		}
>  
> -		bond_fill_ifslave(slave, &binfo.slave);
> -		bond_fill_ifbond(slave->bond, &binfo.master);
> -		netdev_bonding_info_change(slave->dev, &binfo);
>  		rtnl_unlock();
>  	} else {
>  		queue_delayed_work(slave->bond->wq, &slave->notify_work, 1);
> diff --git a/include/net/bonding.h b/include/net/bonding.h
> index b61fb1aa3a56..38d37ea2382c 100644
> --- a/include/net/bonding.h
> +++ b/include/net/bonding.h
> @@ -170,7 +170,8 @@ struct slave {
>  	       inactive:1, /* indicates inactive slave */
>  	       rx_disabled:1, /* indicates whether slave's Rx is disabled */
>  	       should_notify:1, /* indicates whether the state changed */
> -	       should_notify_link:1; /* indicates whether the link changed */
> +	       should_notify_link:1, /* indicates whether the link changed */
> +	       should_notify_lacp:1; /* indicates whether the lacp state changed */
>  	u8     duplex;
>  	u32    original_mtu;
>  	u32    link_failure_count;
> @@ -641,11 +642,10 @@ static inline void bond_slave_link_notify(struct bonding *bond)
>  	struct slave *tmp;
>  
>  	bond_for_each_slave(bond, tmp, iter) {
> -		if (tmp->should_notify_link) {
> +		if (tmp->should_notify_link || tmp->should_notify_lacp)
>  			bond_queue_slave_event(tmp);
> +		if (tmp->should_notify_link)
>  			bond_lower_state_changed(tmp);
> -			tmp->should_notify_link = 0;
> -		}
>  	}
>  }
>  
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index eabfc8290f5e..4507bb8d5264 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -4116,6 +4116,7 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
>  	rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags,
>  			   NULL, 0, portid, nlh);
>  }
> +EXPORT_SYMBOL(rtmsg_ifinfo);
>  
>  void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change,
>  			 gfp_t flags, int *new_nsid, int new_ifindex)


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ