lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251029-support-other-eswitch-v1-4-98bb707b5d57@nvidia.com>
Date: Wed, 29 Oct 2025 17:42:56 +0200
From: Edward Srouji <edwards@...dia.com>
To: Leon Romanovsky <leon@...nel.org>, Saeed Mahameed <saeedm@...dia.com>,
	Tariq Toukan <tariqt@...dia.com>, Mark Bloch <mbloch@...dia.com>, Andrew Lunn
	<andrew+netdev@...n.ch>, "David S . Miller" <davem@...emloft.net>, "Eric
 Dumazet" <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni
	<pabeni@...hat.com>, Jason Gunthorpe <jgg@...pe.ca>
CC: <netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, Patrisious Haddad <phaddad@...dia.com>, "Leon
 Romanovsky" <leonro@...dia.com>, Edward Srouji <edwards@...dia.com>
Subject: [PATCH rdma-next 4/7] RDMA/mlx5: Change default device for LAG slaves in RDMA TRANSPORT namespaces

From: Patrisious Haddad <phaddad@...dia.com>

In case of a LAG configuration change the root namespace core device for
all of the LAG slaves to be the core device of the master device for
RDMA_TRANSPORT namespaces, in order to ensure all tables are created
through the master device.
Once the LAG is disabled revert back to the native core device.

Signed-off-by: Patrisious Haddad <phaddad@...dia.com>
Signed-off-by: Leon Romanovsky <leonro@...dia.com>
Signed-off-by: Edward Srouji <edwards@...dia.com>
---
 drivers/infiniband/hw/mlx5/ib_rep.c | 74 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c
index cc8859d3c2f5..bbecca405171 100644
--- a/drivers/infiniband/hw/mlx5/ib_rep.c
+++ b/drivers/infiniband/hw/mlx5/ib_rep.c
@@ -44,6 +44,63 @@ static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
 	}
 }
 
+static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
+					struct mlx5_core_dev *new_owner)
+{
+	int ret;
+
+	if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
+	    !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
+		return 0;
+
+	if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
+	    !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
+		return 0;
+
+	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+				   FS_FT_RDMA_TRANSPORT_TX);
+	if (ret)
+		return ret;
+
+	ret = mlx5_fs_set_root_dev(cur_owner, new_owner,
+				   FS_FT_RDMA_TRANSPORT_RX);
+	if (ret) {
+		mlx5_fs_set_root_dev(cur_owner, cur_owner,
+				     FS_FT_RDMA_TRANSPORT_TX);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_dev *peer_dev;
+	int i, ret;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		ret = mlx5_ib_set_owner_transport(peer_dev, peer_dev);
+		WARN_ON_ONCE(ret);
+	}
+}
+
+static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_dev *peer_dev;
+	int ret;
+	int i;
+
+	mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
+		ret = mlx5_ib_set_owner_transport(peer_dev, dev);
+		if (ret) {
+			mlx5_ib_release_transport(dev);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 static int
 mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 {
@@ -88,10 +145,18 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	else
 		return mlx5_ib_set_vport_rep(lag_master, rep, vport_index);
 
+	if (mlx5_lag_is_shared_fdb(dev)) {
+		ret = mlx5_ib_take_transport(lag_master);
+		if (ret)
+			return ret;
+	}
+
 	ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
 					 mlx5_core_net(lag_master));
-	if (!ibdev)
-		return -ENOMEM;
+	if (!ibdev) {
+		ret = -ENOMEM;
+		goto release_transport;
+	}
 
 	ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
 			      GFP_KERNEL);
@@ -127,6 +192,10 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	kfree(ibdev->port);
 fail_port:
 	ib_dealloc_device(&ibdev->ib_dev);
+release_transport:
+	if (mlx5_lag_is_shared_fdb(lag_master))
+		mlx5_ib_release_transport(lag_master);
+
 	return ret;
 }
 
@@ -182,6 +251,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 				esw = peer_mdev->priv.eswitch;
 				mlx5_eswitch_unregister_vport_reps(esw, REP_IB);
 			}
+			mlx5_ib_release_transport(mdev);
 		}
 		__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
 	}

-- 
2.47.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ