lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251218-vf-bw-lag-mode-v1-3-7d8ed4368bea@nvidia.com>
Date: Thu, 18 Dec 2025 17:58:13 +0200
From: Edward Srouji <edwards@...dia.com>
To: <edwards@...dia.com>, Leon Romanovsky <leon@...nel.org>, Saeed Mahameed
	<saeedm@...dia.com>, Tariq Toukan <tariqt@...dia.com>, Mark Bloch
	<mbloch@...dia.com>, Andrew Lunn <andrew+netdev@...n.ch>, "David S. Miller"
	<davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski
	<kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Jason Gunthorpe
	<jgg@...pe.ca>
CC: <netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, Or Har-Toov <ohartoov@...dia.com>, "Maher
 Sanalla" <msanalla@...dia.com>
Subject: [PATCH mlx5-next 03/10] net/mlx5: Handle port and vport speed  change events in MPESW

From: Or Har-Toov <ohartoov@...dia.com>

Add port change event handling logic for MPESW LAG mode, ensuring
VFs are updated when the speed of LAG physical ports changes.
This triggers a speed update workflow when relevant port state changes
occur, enabling consistent and accurate reporting of VF bandwidth.

Signed-off-by: Or Har-Toov <ohartoov@...dia.com>
Reviewed-by: Maher Sanalla <msanalla@...dia.com>
Reviewed-by: Mark Bloch <mbloch@...dia.com>
Signed-off-by: Edward Srouji <edwards@...dia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c  | 38 ++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h  |  2 ++
 .../net/ethernet/mellanox/mlx5/core/lag/mpesw.c    | 39 ++++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/lag/mpesw.h    | 14 ++++++++
 drivers/net/ethernet/mellanox/mlx5/core/vport.c    | 29 ++++++++++++++++
 include/linux/mlx5/driver.h                        |  1 +
 include/linux/mlx5/vport.h                         |  2 ++
 7 files changed, 121 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index a042612dcde6..0b931aaecef8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -233,14 +233,25 @@ static void mlx5_ldev_free(struct kref *ref)
 {
 	struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref);
 	struct net *net;
+	int i;
 
 	if (ldev->nb.notifier_call) {
 		net = read_pnet(&ldev->net);
 		unregister_netdevice_notifier_net(net, &ldev->nb);
 	}
 
+	mlx5_ldev_for_each(i, 0, ldev) {
+		if (ldev->pf[i].dev &&
+		    ldev->pf[i].port_change_nb.nb.notifier_call) {
+			struct mlx5_nb *nb = &ldev->pf[i].port_change_nb;
+
+			mlx5_eq_notifier_unregister(ldev->pf[i].dev, nb);
+		}
+	}
+
 	mlx5_lag_mp_cleanup(ldev);
 	cancel_delayed_work_sync(&ldev->bond_work);
+	cancel_work_sync(&ldev->speed_update_work);
 	destroy_workqueue(ldev->wq);
 	mutex_destroy(&ldev->lock);
 	kfree(ldev);
@@ -274,6 +285,7 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
 	kref_init(&ldev->ref);
 	mutex_init(&ldev->lock);
 	INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work);
+	INIT_WORK(&ldev->speed_update_work, mlx5_mpesw_speed_update_work);
 
 	ldev->nb.notifier_call = mlx5_lag_netdev_event;
 	write_pnet(&ldev->net, mlx5_core_net(dev));
@@ -1033,6 +1045,13 @@ static int mlx5_lag_sum_devices_max_speed(struct mlx5_lag *ldev, u32 *max_speed)
 					  mlx5_port_max_linkspeed);
 }
 
+static int mlx5_lag_sum_devices_oper_speed(struct mlx5_lag *ldev,
+					   u32 *oper_speed)
+{
+	return mlx5_lag_sum_devices_speed(ldev, oper_speed,
+					  mlx5_port_oper_linkspeed);
+}
+
 static void mlx5_lag_modify_device_vports_speed(struct mlx5_core_dev *mdev,
 						u32 speed)
 {
@@ -1070,10 +1089,14 @@ void mlx5_lag_set_vports_agg_speed(struct mlx5_lag *ldev)
 	u32 speed;
 	int pf_idx;
 
-	speed = ldev->tracker.bond_speed_mbps;
-
-	if (speed == SPEED_UNKNOWN)
-		return;
+	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
+		if (mlx5_lag_sum_devices_oper_speed(ldev, &speed))
+			return;
+	} else {
+		speed = ldev->tracker.bond_speed_mbps;
+		if (speed == SPEED_UNKNOWN)
+			return;
+	}
 
 	/* If speed is not set, use the sum of max speeds of all PFs */
 	if (!speed && mlx5_lag_sum_devices_max_speed(ldev, &speed))
@@ -1520,6 +1543,10 @@ static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev,
 
 	ldev->pf[fn].dev = dev;
 	dev->priv.lag = ldev;
+
+	MLX5_NB_INIT(&ldev->pf[fn].port_change_nb,
+		     mlx5_lag_mpesw_port_change_event, PORT_CHANGE);
+	mlx5_eq_notifier_register(dev, &ldev->pf[fn].port_change_nb);
 }
 
 static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
@@ -1531,6 +1558,9 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev,
 	if (ldev->pf[fn].dev != dev)
 		return;
 
+	if (ldev->pf[fn].port_change_nb.nb.notifier_call)
+		mlx5_eq_notifier_unregister(dev, &ldev->pf[fn].port_change_nb);
+
 	ldev->pf[fn].dev = NULL;
 	dev->priv.lag = NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 8de5640a0161..be1afece5fdc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -39,6 +39,7 @@ struct lag_func {
 	struct mlx5_core_dev *dev;
 	struct net_device    *netdev;
 	bool has_drop;
+	struct mlx5_nb port_change_nb;
 };
 
 /* Used for collection of netdev event info. */
@@ -67,6 +68,7 @@ struct mlx5_lag {
 	struct lag_tracker        tracker;
 	struct workqueue_struct   *wq;
 	struct delayed_work       bond_work;
+	struct work_struct        speed_update_work;
 	struct notifier_block     nb;
 	possible_net_t net;
 	struct lag_mp             lag_mp;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index aad52d3a90e6..31464343f642 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -103,6 +103,8 @@ static int enable_mpesw(struct mlx5_lag *ldev)
 			goto err_rescan_drivers;
 	}
 
+	mlx5_lag_set_vports_agg_speed(ldev);
+
 	return 0;
 
 err_rescan_drivers:
@@ -216,3 +218,40 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev)
 	return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
 }
 EXPORT_SYMBOL(mlx5_lag_is_mpesw);
+
+void mlx5_mpesw_speed_update_work(struct work_struct *work)
+{
+	struct mlx5_lag *ldev = container_of(work, struct mlx5_lag,
+					     speed_update_work);
+
+	mutex_lock(&ldev->lock);
+	if (ldev->mode == MLX5_LAG_MODE_MPESW) {
+		if (ldev->mode_changes_in_progress)
+			queue_work(ldev->wq, &ldev->speed_update_work);
+		else
+			mlx5_lag_set_vports_agg_speed(ldev);
+	}
+
+	mutex_unlock(&ldev->lock);
+}
+
+int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
+				     unsigned long event, void *data)
+{
+	struct mlx5_nb *mlx5_nb = container_of(nb, struct mlx5_nb, nb);
+	struct lag_func *lag_func = container_of(mlx5_nb,
+						 struct lag_func,
+						 port_change_nb);
+	struct mlx5_core_dev *dev = lag_func->dev;
+	struct mlx5_lag *ldev = dev->priv.lag;
+	struct mlx5_eqe *eqe = data;
+
+	if (!ldev)
+		return NOTIFY_DONE;
+
+	if (eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_DOWN ||
+	    eqe->sub_type == MLX5_PORT_CHANGE_SUBTYPE_ACTIVE)
+		queue_work(ldev->wq, &ldev->speed_update_work);
+
+	return NOTIFY_OK;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
index 02520f27a033..f5d9b5c97b0d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -32,4 +32,18 @@ bool mlx5_lag_is_mpesw(struct mlx5_core_dev *dev);
 void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev);
 int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev);
 
+#ifdef CONFIG_MLX5_ESWITCH
+void mlx5_mpesw_speed_update_work(struct work_struct *work);
+int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
+				     unsigned long event, void *data);
+#else
+static inline void mlx5_mpesw_speed_update_work(struct work_struct *work) {}
+static inline int mlx5_lag_mpesw_port_change_event(struct notifier_block *nb,
+						   unsigned long event,
+						   void *data)
+{
+	return NOTIFY_DONE;
+}
+#endif /* CONFIG_MLX5_ESWITCH */
+
 #endif /* __MLX5_LAG_MPESW_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 78b1b291cfa4..cb098d3eb2fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -122,6 +122,35 @@ int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
 	return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
 }
 
+int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod,
+				  u16 vport, u8 other_vport, u32 *max_tx_speed)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+	u32 state;
+	int err;
+
+	MLX5_SET(query_vport_state_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_VPORT_STATE);
+	MLX5_SET(query_vport_state_in, in, op_mod, op_mod);
+	MLX5_SET(query_vport_state_in, in, vport_number, vport);
+	MLX5_SET(query_vport_state_in, in, other_vport, other_vport);
+
+	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+	if (err)
+		return err;
+
+	state = MLX5_GET(query_vport_state_out, out, state);
+	if (state == VPORT_STATE_DOWN) {
+		*max_tx_speed = 0;
+		return 0;
+	}
+
+	*max_tx_speed = MLX5_GET(query_vport_state_out, out, max_tx_speed);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_vport_max_tx_speed);
+
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
 					bool other_vport, u32 *out)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1c54aa6f74fb..9e0ab3cfab73 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1149,6 +1149,7 @@ int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_roce(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
+int mlx5_lag_query_bond_speed(struct net_device *bond_dev, u32 *speed);
 bool mlx5_lag_mode_is_hash(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_master(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 2acf10e9f60a..dfa2fe32217a 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -60,6 +60,8 @@ enum {
 u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport);
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 other_vport, u8 state);
+int mlx5_query_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 op_mod,
+				  u16 vport, u8 other_vport, u32 *max_tx_speed);
 int mlx5_modify_vport_max_tx_speed(struct mlx5_core_dev *mdev, u8 opmod,
 				   u16 vport, u8 other_vport, u16 max_tx_speed);
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,

-- 
2.47.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ