lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4108bfd8-b19f-46ea-8820-47dd8fb9ee7c@blackwall.org>
Date: Wed, 5 Mar 2025 10:38:36 +0200
From: Nikolay Aleksandrov <razor@...ckwall.org>
To: Hangbin Liu <liuhangbin@...il.com>, netdev@...r.kernel.org
Cc: Jay Vosburgh <jv@...sburgh.net>, Andrew Lunn <andrew+netdev@...n.ch>,
 "David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
 Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
 Simon Horman <horms@...nel.org>, Shuah Khan <shuah@...nel.org>,
 Tariq Toukan <tariqt@...dia.com>, Jianbo Liu <jianbol@...dia.com>,
 Jarod Wilson <jarod@...hat.com>,
 Steffen Klassert <steffen.klassert@...unet.com>,
 Cosmin Ratiu <cratiu@...dia.com>, Petr Machata <petrm@...dia.com>,
 linux-kselftest@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCHv4 net 1/3] bonding: move IPsec deletion to
 bond_ipsec_free_sa

On 3/4/25 15:11, Hangbin Liu wrote:
> The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers
> a warning:
> 
>   BUG: sleeping function called from invalid context at...
> 
> Fix this by moving the IPsec deletion operation to bond_ipsec_free_sa,
> which is not held by spin_lock_bh().
> 
> Additionally, delete the IPsec list in bond_ipsec_del_sa_all() when the
> XFRM state is DEAD to prevent xdo_dev_state_free() from being triggered
> again in bond_ipsec_free_sa().
> 
> For bond_ipsec_free_sa(), there are now three conditions:
> 
>   1. if (!slave): When no active device exists.
>   2. if (!xs->xso.real_dev): When xdo_dev_state_add() fails.
>   3. if (xs->xso.real_dev != real_dev): When an xs has already been freed
>      by bond_ipsec_del_sa_all() due to migration, and the active slave has
>      changed to a new device. At the same time, the xs is marked as DEAD
>      due to the XFRM entry is removed, triggering xfrm_state_gc_task() and
>      bond_ipsec_free_sa().
> 
> In all three cases, xdo_dev_state_free() should not be called, only xs
> should be removed from bond->ipsec list.
> 
> At the same time, protect bond_ipsec_del_sa_all and bond_ipsec_add_sa_all
> with x->lock for each xs being processed. This prevents XFRM from
> concurrently initiating add/delete operations on the managed states.
> 
> Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex")
> Reported-by: Jakub Kicinski <kuba@...nel.org>
> Closes: https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org
> Suggested-by: Cosmin Ratiu <cratiu@...dia.com>
> Signed-off-by: Hangbin Liu <liuhangbin@...il.com>
> ---
>  drivers/net/bonding/bond_main.c | 53 +++++++++++++++++++++++----------
>  1 file changed, 37 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
> index e45bba240cbc..06b060d9b031 100644
> --- a/drivers/net/bonding/bond_main.c
> +++ b/drivers/net/bonding/bond_main.c
> @@ -537,15 +537,22 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
>  	}
>  
>  	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> +		spin_lock_bh(&ipsec->xs->lock);
> +		/* Skip dead xfrm states, they'll be freed later. */
> +		if (ipsec->xs->km.state == XFRM_STATE_DEAD)
> +			goto next;
> +
>  		/* If new state is added before ipsec_lock acquired */
>  		if (ipsec->xs->xso.real_dev == real_dev)
> -			continue;
> +			goto next;
>  
>  		ipsec->xs->xso.real_dev = real_dev;
>  		if (real_dev->xfrmdev_ops->xdo_dev_state_add(ipsec->xs, NULL)) {
>  			slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
>  			ipsec->xs->xso.real_dev = NULL;
>  		}
> +next:
> +		spin_unlock_bh(&ipsec->xs->lock);
>  	}
>  out:
>  	mutex_unlock(&bond->ipsec_lock);
> @@ -560,7 +567,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>  	struct net_device *bond_dev = xs->xso.dev;
>  	struct net_device *real_dev;
>  	netdevice_tracker tracker;
> -	struct bond_ipsec *ipsec;
>  	struct bonding *bond;
>  	struct slave *slave;
>  
> @@ -592,15 +598,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>  	real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>  out:
>  	netdev_put(real_dev, &tracker);
> -	mutex_lock(&bond->ipsec_lock);
> -	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> -		if (ipsec->xs == xs) {
> -			list_del(&ipsec->list);
> -			kfree(ipsec);
> -			break;
> -		}
> -	}
> -	mutex_unlock(&bond->ipsec_lock);
>  }
>  
>  static void bond_ipsec_del_sa_all(struct bonding *bond)
> @@ -617,8 +614,18 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
>  
>  	mutex_lock(&bond->ipsec_lock);
>  	list_for_each_entry(ipsec, &bond->ipsec_list, list) {

Second time - you should use list_for_each_entry_safe if you're walking and deleting
elements from the list.

> +		spin_lock_bh(&ipsec->xs->lock);
>  		if (!ipsec->xs->xso.real_dev)
> -			continue;
> +			goto next;
> +
> +		if (ipsec->xs->km.state == XFRM_STATE_DEAD) {
> +			/* already dead no need to delete again */
> +			if (real_dev->xfrmdev_ops->xdo_dev_state_free)
> +				real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);

Have you checked if .xdo_dev_state_free can sleep?
I see at least one that can: mlx5e_xfrm_free_state().

> +			list_del(&ipsec->list);
> +			kfree(ipsec);
> +			goto next;
> +		}
>  
>  		if (!real_dev->xfrmdev_ops ||
>  		    !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
> @@ -631,6 +638,8 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
>  			if (real_dev->xfrmdev_ops->xdo_dev_state_free)
>  				real_dev->xfrmdev_ops->xdo_dev_state_free(ipsec->xs);
>  		}
> +next:
> +		spin_unlock_bh(&ipsec->xs->lock);
>  	}
>  	mutex_unlock(&bond->ipsec_lock);
>  }
> @@ -640,6 +649,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
>  	struct net_device *bond_dev = xs->xso.dev;
>  	struct net_device *real_dev;
>  	netdevice_tracker tracker;
> +	struct bond_ipsec *ipsec;
>  	struct bonding *bond;
>  	struct slave *slave;
>  
> @@ -659,11 +669,22 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
>  	if (!xs->xso.real_dev)
>  		goto out;
>  
> -	WARN_ON(xs->xso.real_dev != real_dev);
> +	mutex_lock(&bond->ipsec_lock);
> +	list_for_each_entry(ipsec, &bond->ipsec_list, list) {
> +		if (ipsec->xs == xs) {
> +			/* do xdo_dev_state_free if real_dev matches,
> +			 * otherwise only remove the list
> +			 */
> +			if (real_dev && real_dev->xfrmdev_ops &&
> +			    real_dev->xfrmdev_ops->xdo_dev_state_free)
> +				real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
> +			list_del(&ipsec->list);
> +			kfree(ipsec);
> +			break;
> +		}
> +	}
> +	mutex_unlock(&bond->ipsec_lock);
>  
> -	if (real_dev && real_dev->xfrmdev_ops &&
> -	    real_dev->xfrmdev_ops->xdo_dev_state_free)
> -		real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
>  out:
>  	netdev_put(real_dev, &tracker);
>  }


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ