[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251113104310.1243150-1-cratiu@nvidia.com>
Date: Thu, 13 Nov 2025 12:43:09 +0200
From: Cosmin Ratiu <cratiu@...dia.com>
To: <netdev@...r.kernel.org>
CC: Jay Vosburgh <jv@...sburgh.net>, Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, "Simon
Horman" <horms@...nel.org>, Taehee Yoo <ap420073@...il.com>, Jianbo Liu
<jianbol@...dia.com>, Steffen Klassert <steffen.klassert@...unet.com>,
Herbert Xu <herbert@...dor.apana.org.au>, Leon Romanovsky
<leonro@...dia.com>, Cosmin Ratiu <cratiu@...dia.com>
Subject: [PATCH ipsec v2 1/2] bond: Use xfrm_state_migrate to migrate SAs
The bonding driver manages offloaded SAs using the following strategy:
An xfrm_state offloaded on the bond device with bond_ipsec_add_sa() uses
'real_dev' on the xfrm_state xs to redirect the offload to the current
active slave. The corresponding bond_ipsec_del_sa() (called with the xs
spinlock held) redirects the unoffload call to real_dev. Finally,
cleanup happens in bond_ipsec_free_sa(), which removes the offload from
the device. Since the last call happens without the xs spinlock held,
that is where the real work to unoffload actually happens.
When the active slave changes to a new device a 3-step process is used
to migrate all xfrm states to the new device:
1. bond_ipsec_del_sa_all() unoffloads all states in bond->ipsec_list
from the previously active device.
2. The active slave is flipped to the new device.
3. bond_ipsec_add_sa_all() offloads all states in bond->ipsec_list to
the new device.
This patch closes a race that could happen between xfrm_state migration
and TX, which could result in unencrypted packets going out the wire:
CPU1 (xfrm_output) CPU2 (bond_change_active_slave)
bond_ipsec_offload_ok -> true
bond_ipsec_del_sa_all
bond_xmit_activebackup
bond_dev_queue_xmit
dev_queue_xmit on old_dev
bond->curr_active_slave = new_dev
bond_ipsec_add_sa_all
So the packet makes it out to old_dev after the offloaded xfrm_state is
deleted from it. The result: an unencrypted IPSec packet on the wire.
With the new approach, in-use states on old_dev will not be deleted
until in-flight packets are transmitted. It also makes for cleaner
bonding code, which no longer needs to care about xfrm_state management
so much.
Fixes: ("ec13009472f4 bonding: implement xdo_dev_state_free and call it after deletion")
Signed-off-by: Cosmin Ratiu <cratiu@...dia.com>
---
drivers/net/bonding/bond_main.c | 126 ++++++++++++--------------------
1 file changed, 45 insertions(+), 81 deletions(-)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 67fdcbdd2764..e45e89179236 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -513,19 +513,21 @@ static int bond_ipsec_add_sa(struct net_device *bond_dev,
return err;
}
-static void bond_ipsec_add_sa_all(struct bonding *bond)
+static void bond_ipsec_migrate_sa_all(struct bonding *bond)
{
+ struct slave *new_active = rtnl_dereference(bond->curr_active_slave);
struct net_device *bond_dev = bond->dev;
+ struct net *net = dev_net(bond_dev);
+ struct bond_ipsec *ipsec, *tmp;
+ struct xfrm_user_offload xuo;
struct net_device *real_dev;
- struct bond_ipsec *ipsec;
- struct slave *slave;
+ struct xfrm_migrate m = {};
+ LIST_HEAD(ipsec_list);
- slave = rtnl_dereference(bond->curr_active_slave);
- real_dev = slave ? slave->dev : NULL;
- if (!real_dev)
+ if (!new_active)
return;
- mutex_lock(&bond->ipsec_lock);
+ real_dev = new_active->dev;
if (!real_dev->xfrmdev_ops ||
!real_dev->xfrmdev_ops->xdo_dev_state_add ||
netif_is_bond_master(real_dev)) {
@@ -533,36 +535,42 @@ static void bond_ipsec_add_sa_all(struct bonding *bond)
slave_warn(bond_dev, real_dev,
"%s: no slave xdo_dev_state_add\n",
__func__);
- goto out;
+ return;
}
- list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- /* If new state is added before ipsec_lock acquired */
- if (ipsec->xs->xso.real_dev == real_dev)
- continue;
+ /* Prepare the list of xfrm_states to be migrated. */
+ mutex_lock(&bond->ipsec_lock);
+ list_splice_init(&bond->ipsec_list, &ipsec_list);
+ /* Add back states already offloaded on the new device before the
+ * lock was acquired and hold all remaining states to avoid them
+ * getting deleted during the migration.
+ */
+ list_for_each_entry_safe(ipsec, tmp, &ipsec_list, list) {
+ if (unlikely(ipsec->xs->xso.real_dev == real_dev))
+ list_move_tail(&ipsec->list, &bond->ipsec_list);
+ else
+ xfrm_state_hold(ipsec->xs);
+ }
+ mutex_unlock(&bond->ipsec_lock);
- if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev,
- ipsec->xs, NULL)) {
- slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__);
- continue;
- }
+ xuo.ifindex = bond_dev->ifindex;
+ list_for_each_entry_safe(ipsec, tmp, &ipsec_list, list) {
+ struct xfrm_state *x = ipsec->xs;
- spin_lock_bh(&ipsec->xs->lock);
- /* xs might have been killed by the user during the migration
- * to the new dev, but bond_ipsec_del_sa() should have done
- * nothing, as xso.real_dev is NULL.
- * Delete it from the device we just added it to. The pending
- * bond_ipsec_free_sa() call will do the rest of the cleanup.
- */
- if (ipsec->xs->km.state == XFRM_STATE_DEAD &&
- real_dev->xfrmdev_ops->xdo_dev_state_delete)
- real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
- ipsec->xs);
- ipsec->xs->xso.real_dev = real_dev;
- spin_unlock_bh(&ipsec->xs->lock);
+ m.new_family = x->props.family;
+ memcpy(&m.new_daddr, &x->id.daddr, sizeof(x->id.daddr));
+ memcpy(&m.new_saddr, &x->props.saddr, sizeof(x->props.saddr));
+
+ xuo.flags = x->xso.dir == XFRM_DEV_OFFLOAD_IN ?
+ XFRM_OFFLOAD_INBOUND : 0;
+
+ if (!xfrm_state_migrate(x, &m, NULL, net, &xuo, NULL))
+ slave_warn(bond_dev, real_dev,
+ "%s: xfrm_state_migrate failed\n", __func__);
+ xfrm_state_delete(x);
+ xfrm_state_put(x);
+ kfree(ipsec);
}
-out:
- mutex_unlock(&bond->ipsec_lock);
}
/**
@@ -590,47 +598,6 @@ static void bond_ipsec_del_sa(struct net_device *bond_dev,
real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev, xs);
}
-static void bond_ipsec_del_sa_all(struct bonding *bond)
-{
- struct net_device *bond_dev = bond->dev;
- struct net_device *real_dev;
- struct bond_ipsec *ipsec;
- struct slave *slave;
-
- slave = rtnl_dereference(bond->curr_active_slave);
- real_dev = slave ? slave->dev : NULL;
- if (!real_dev)
- return;
-
- mutex_lock(&bond->ipsec_lock);
- list_for_each_entry(ipsec, &bond->ipsec_list, list) {
- if (!ipsec->xs->xso.real_dev)
- continue;
-
- if (!real_dev->xfrmdev_ops ||
- !real_dev->xfrmdev_ops->xdo_dev_state_delete ||
- netif_is_bond_master(real_dev)) {
- slave_warn(bond_dev, real_dev,
- "%s: no slave xdo_dev_state_delete\n",
- __func__);
- continue;
- }
-
- spin_lock_bh(&ipsec->xs->lock);
- ipsec->xs->xso.real_dev = NULL;
- /* Don't double delete states killed by the user. */
- if (ipsec->xs->km.state != XFRM_STATE_DEAD)
- real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
- ipsec->xs);
- spin_unlock_bh(&ipsec->xs->lock);
-
- if (real_dev->xfrmdev_ops->xdo_dev_state_free)
- real_dev->xfrmdev_ops->xdo_dev_state_free(real_dev,
- ipsec->xs);
- }
- mutex_unlock(&bond->ipsec_lock);
-}
-
static void bond_ipsec_free_sa(struct net_device *bond_dev,
struct xfrm_state *xs)
{
@@ -1221,10 +1188,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
if (old_active == new_active)
return;
-#ifdef CONFIG_XFRM_OFFLOAD
- bond_ipsec_del_sa_all(bond);
-#endif /* CONFIG_XFRM_OFFLOAD */
-
if (new_active) {
new_active->last_link_up = jiffies;
@@ -1247,6 +1210,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
if (bond_uses_primary(bond))
slave_info(bond->dev, new_active->dev, "making interface the new active one\n");
}
+
}
if (bond_uses_primary(bond))
@@ -1264,6 +1228,10 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
rcu_assign_pointer(bond->curr_active_slave, new_active);
}
+#ifdef CONFIG_XFRM_OFFLOAD
+ bond_ipsec_migrate_sa_all(bond);
+#endif
+
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
if (old_active)
bond_set_slave_inactive_flags(old_active,
@@ -1296,10 +1264,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
}
}
-#ifdef CONFIG_XFRM_OFFLOAD
- bond_ipsec_add_sa_all(bond);
-#endif /* CONFIG_XFRM_OFFLOAD */
-
/* resend IGMP joins since active slave has changed or
* all were sent on curr_active_slave.
* resend only if bond is brought up with the affected
--
2.45.0
Powered by blists - more mailing lists