[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <13cb4b16-51b0-4042-8435-6dac72586e55@blackwall.org>
Date: Thu, 27 Feb 2025 11:21:51 +0200
From: Nikolay Aleksandrov <razor@...ckwall.org>
To: Hangbin Liu <liuhangbin@...il.com>, netdev@...r.kernel.org
Cc: Jay Vosburgh <jv@...sburgh.net>, Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Simon Horman <horms@...nel.org>, Shuah Khan <shuah@...nel.org>,
Tariq Toukan <tariqt@...dia.com>, Jianbo Liu <jianbol@...dia.com>,
Jarod Wilson <jarod@...hat.com>,
Steffen Klassert <steffen.klassert@...unet.com>,
Cosmin Ratiu <cratiu@...dia.com>, linux-kselftest@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCHv3 net 1/3] bonding: move IPsec deletion to
bond_ipsec_free_sa
On 2/27/25 10:50, Nikolay Aleksandrov wrote:
> On 2/27/25 10:37, Hangbin Liu wrote:
>> The fixed commit placed mutex_lock() inside spin_lock_bh(), which triggers
>> a warning:
>>
>> BUG: sleeping function called from invalid context at...
>>
>> Fix this by moving the IPsec deletion operation to bond_ipsec_free_sa,
>> which is not held by spin_lock_bh().
>>
>> Additionally, delete the IPsec list in bond_ipsec_del_sa_all() when the
>> XFRM state is DEAD to prevent xdo_dev_state_free() from being triggered
>> again in bond_ipsec_free_sa().
>>
>> For bond_ipsec_free_sa(), there are now three conditions:
>>
>> 1. if (!slave): When no active device exists.
>> 2. if (!xs->xso.real_dev): When xdo_dev_state_add() fails.
>> 3. if (xs->xso.real_dev != real_dev): When an xs has already been freed
>> by bond_ipsec_del_sa_all() due to migration, and the active slave has
>> changed to a new device. At the same time, the xs is marked as DEAD
>> due to the XFRM entry is removed, triggering xfrm_state_gc_task() and
>> bond_ipsec_free_sa().
>>
>> In all three cases, xdo_dev_state_free() should not be called, only xs
>> should be removed from bond->ipsec list.
>>
>> Fixes: 2aeeef906d5a ("bonding: change ipsec_lock from spin lock to mutex")
>> Reported-by: Jakub Kicinski <kuba@...nel.org>
>> Closes: https://lore.kernel.org/netdev/20241212062734.182a0164@kernel.org
>> Suggested-by: Cosmin Ratiu <cratiu@...dia.com>
>> Signed-off-by: Hangbin Liu <liuhangbin@...il.com>
>> ---
>> drivers/net/bonding/bond_main.c | 34 ++++++++++++++++++++++-----------
>> 1 file changed, 23 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>> index e45bba240cbc..683bf1221caf 100644
>> --- a/drivers/net/bonding/bond_main.c
>> +++ b/drivers/net/bonding/bond_main.c
>> @@ -537,6 +537,10 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
>> }
>>
>> list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> + /* Skip dead xfrm states, they'll be freed later. */
>> + if (ipsec->xs->km.state == XFRM_STATE_DEAD)
>> + continue;
>> +
>> /* If new state is added before ipsec_lock acquired */
>> if (ipsec->xs->xso.real_dev == real_dev)
>> continue;
>> @@ -560,7 +564,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>> struct net_device *bond_dev = xs->xso.dev;
>> struct net_device *real_dev;
>> netdevice_tracker tracker;
>> - struct bond_ipsec *ipsec;
>> struct bonding *bond;
>> struct slave *slave;
>>
>> @@ -592,15 +595,6 @@ static void bond_ipsec_del_sa(struct xfrm_state *xs)
>> real_dev->xfrmdev_ops->xdo_dev_state_delete(xs);
>> out:
>> netdev_put(real_dev, &tracker);
>> - mutex_lock(&bond->ipsec_lock);
>> - list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> - if (ipsec->xs == xs) {
>> - list_del(&ipsec->list);
>> - kfree(ipsec);
>> - break;
>> - }
>> - }
>> - mutex_unlock(&bond->ipsec_lock);
>> }
>>
>> static void bond_ipsec_del_sa_all(struct bonding *bond)
>> @@ -617,6 +611,12 @@ static void bond_ipsec_del_sa_all(struct bonding *bond)
>>
>> mutex_lock(&bond->ipsec_lock);
>> list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> + if (ipsec->xs->km.state == XFRM_STATE_DEAD) {
>> + list_del(&ipsec->list);
>
> To be able to do this here, you'll have to use list_for_each_entry_safe().
>
One more thing - note I'm not an xfrm expert by far but it seems to me here you have
to also call xdo_dev_state_free() with the old active slave dev otherwise that will
never get called with the original real_dev after the switch to a new
active slave (or more accurately it might if the GC runs between the switching
but it is a race), care must be taken wrt sequence of events because the XFRM
GC may be running in parallel which probably means that in bond_ipsec_free_sa()
you'll have to take the mutex before calling xdo_dev_state_free() and check
if the entry is still linked in the bond's ipsec list before calling the free_sa
callback, if it isn't then del_sa_all got to it before the GC and there's nothing
to do if it also called the dev's free_sa callback. The check for real_dev doesn't
seem enough to protect against this race.
Cheers,
Nik
>> + kfree(ipsec);
>> + continue;
>> + }
>> +
>> if (!ipsec->xs->xso.real_dev)
>> continue;
>>
>> @@ -640,6 +640,7 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
>> struct net_device *bond_dev = xs->xso.dev;
>> struct net_device *real_dev;
>> netdevice_tracker tracker;
>> + struct bond_ipsec *ipsec;
>> struct bonding *bond;
>> struct slave *slave;
>>
>> @@ -659,13 +660,24 @@ static void bond_ipsec_free_sa(struct xfrm_state *xs)
>> if (!xs->xso.real_dev)
>> goto out;
>>
>> - WARN_ON(xs->xso.real_dev != real_dev);
>> + if (xs->xso.real_dev != real_dev)
>> + goto out;
>>
>> if (real_dev && real_dev->xfrmdev_ops &&
>> real_dev->xfrmdev_ops->xdo_dev_state_free)
>> real_dev->xfrmdev_ops->xdo_dev_state_free(xs);
>> out:
>> netdev_put(real_dev, &tracker);
>> +
>> + mutex_lock(&bond->ipsec_lock);
>> + list_for_each_entry(ipsec, &bond->ipsec_list, list) {
>> + if (ipsec->xs == xs) {
>> + list_del(&ipsec->list);
>> + kfree(ipsec);
>> + break;
>> + }
>> + }
>> + mutex_unlock(&bond->ipsec_lock);
>> }
>>
>> /**
>
Powered by blists - more mailing lists