[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1763644166-1250608-13-git-send-email-tariqt@nvidia.com>
Date: Thu, 20 Nov 2025 15:09:24 +0200
From: Tariq Toukan <tariqt@...dia.com>
To: Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>, "David
S. Miller" <davem@...emloft.net>
CC: Donald Hunter <donald.hunter@...il.com>, Jiri Pirko <jiri@...nulli.us>,
Jonathan Corbet <corbet@....net>, Saeed Mahameed <saeedm@...dia.com>, "Leon
Romanovsky" <leon@...nel.org>, Tariq Toukan <tariqt@...dia.com>, Mark Bloch
<mbloch@...dia.com>, <netdev@...r.kernel.org>,
<linux-kernel@...r.kernel.org>, <linux-doc@...r.kernel.org>,
<linux-rdma@...r.kernel.org>, Gal Pressman <gal@...dia.com>, Moshe Shemesh
<moshe@...dia.com>, Carolina Jubran <cjubran@...dia.com>, Cosmin Ratiu
<cratiu@...dia.com>, Jiri Pirko <jiri@...dia.com>
Subject: [PATCH net-next 12/14] net/mlx5: qos: Support cross-device tx scheduling
From: Cosmin Ratiu <cratiu@...dia.com>
Up to now, rate groups could only contain vports from the same E-Switch.
This patch relaxes that restriction if the device supports it
(HCA_CAP.esw_cross_esw_sched == true) and the right conditions are met:
- Link Aggregation (LAG) is enabled.
- The E-Switches are from the same shared devlink device.
This patch does not yet enable cross-esw scheduling, it's just the last
preparatory patch.
Signed-off-by: Cosmin Ratiu <cratiu@...dia.com>
Reviewed-by: Carolina Jubran <cjubran@...dia.com>
Signed-off-by: Tariq Toukan <tariqt@...dia.com>
---
.../net/ethernet/mellanox/mlx5/core/esw/qos.c | 114 +++++++++++++-----
1 file changed, 81 insertions(+), 33 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index f86d7c50db42..3c8716b0644b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -50,7 +50,9 @@ struct mlx5_esw_sched_node {
enum sched_node_type type;
/* The eswitch this node belongs to. */
struct mlx5_eswitch *esw;
- /* The children nodes of this node, empty list for leaf nodes. */
+ /* The children nodes of this node, empty list for leaf nodes.
+ * Can be from multiple E-Switches.
+ */
struct list_head children;
/* Valid only if this node is associated with a vport. */
struct mlx5_vport *vport;
@@ -419,6 +421,7 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
struct mlx5_esw_sched_node *parent = vport_node->parent;
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_core_dev *dev = vport_node->esw->dev;
+ struct mlx5_vport *vport = vport_node->vport;
void *attr;
if (!mlx5_qos_element_type_supported(
@@ -430,11 +433,18 @@ esw_qos_vport_create_sched_element(struct mlx5_esw_sched_node *vport_node,
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
- MLX5_SET(vport_element, attr, vport_number, vport_node->vport->vport);
+ MLX5_SET(vport_element, attr, vport_number, vport->vport);
MLX5_SET(scheduling_context, sched_ctx, parent_element_id,
parent ? parent->ix : vport_node->esw->qos.root_tsar_ix);
MLX5_SET(scheduling_context, sched_ctx, max_average_bw,
vport_node->max_rate);
+ if (vport->dev != dev) {
+ /* The port is assigned to a node on another eswitch. */
+ MLX5_SET(vport_element, attr, eswitch_owner_vhca_id_valid,
+ true);
+ MLX5_SET(vport_element, attr, eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(vport->dev, vhca_id));
+ }
return esw_qos_node_create_sched_element(vport_node, sched_ctx, extack);
}
@@ -446,6 +456,7 @@ esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_core_dev *dev = vport_tc_node->esw->dev;
+ struct mlx5_vport *vport = vport_tc_node->vport;
void *attr;
if (!mlx5_qos_element_type_supported(
@@ -457,8 +468,7 @@ esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC);
attr = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
- MLX5_SET(vport_tc_element, attr, vport_number,
- vport_tc_node->vport->vport);
+ MLX5_SET(vport_tc_element, attr, vport_number, vport->vport);
MLX5_SET(vport_tc_element, attr, traffic_class, vport_tc_node->tc);
MLX5_SET(scheduling_context, sched_ctx, max_bw_obj_id,
rate_limit_elem_ix);
@@ -466,6 +476,13 @@ esw_qos_vport_tc_create_sched_element(struct mlx5_esw_sched_node *vport_tc_node,
vport_tc_node->parent->ix);
MLX5_SET(scheduling_context, sched_ctx, bw_share,
vport_tc_node->bw_share);
+ if (vport->dev != dev) {
+ /* The port is assigned to a node on another eswitch. */
+ MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id_valid,
+ true);
+ MLX5_SET(vport_tc_element, attr, eswitch_owner_vhca_id,
+ MLX5_CAP_GEN(vport->dev, vhca_id));
+ }
return esw_qos_node_create_sched_element(vport_tc_node, sched_ctx,
extack);
@@ -1194,6 +1211,29 @@ static int esw_qos_vport_tc_check_type(enum sched_node_type curr_type,
return 0;
}
+static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
+ u32 *tc_bw)
+{
+ int i, num_tcs = esw_qos_num_tcs(esw->dev);
+
+ for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++)
+ if (tc_bw[i])
+ return false;
+
+ return true;
+}
+
+static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
+ u32 *tc_bw)
+{
+ struct mlx5_esw_sched_node *node = vport->qos.sched_node;
+ struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
+
+ esw = (node && node->parent) ? node->parent->esw : esw;
+
+ return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
+}
+
static int esw_qos_vport_update(struct mlx5_vport *vport,
enum sched_node_type type,
struct mlx5_esw_sched_node *parent,
@@ -1227,6 +1267,12 @@ static int esw_qos_vport_update(struct mlx5_vport *vport,
if (curr_type == SCHED_NODE_TYPE_TC_ARBITER_TSAR && curr_type == type) {
esw_qos_set_tc_arbiter_bw_shares(vport_node, curr_tc_bw,
extack);
+ if (!esw_qos_validate_unsupported_tc_bw(parent->esw,
+ curr_tc_bw)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Unsupported traffic classes on the new device");
+ return -EOPNOTSUPP;
+ }
}
return err;
@@ -1575,30 +1621,6 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
return 0;
}
-static bool esw_qos_validate_unsupported_tc_bw(struct mlx5_eswitch *esw,
- u32 *tc_bw)
-{
- int i, num_tcs = esw_qos_num_tcs(esw->dev);
-
- for (i = num_tcs; i < DEVLINK_RATE_TCS_MAX; i++) {
- if (tc_bw[i])
- return false;
- }
-
- return true;
-}
-
-static bool esw_qos_vport_validate_unsupported_tc_bw(struct mlx5_vport *vport,
- u32 *tc_bw)
-{
- struct mlx5_esw_sched_node *node = vport->qos.sched_node;
- struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
-
- esw = (node && node->parent) ? node->parent->esw : esw;
-
- return esw_qos_validate_unsupported_tc_bw(esw, tc_bw);
-}
-
static bool esw_qos_tc_bw_disabled(u32 *tc_bw)
{
int i;
@@ -1803,18 +1825,44 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
return 0;
}
+static int
+mlx5_esw_validate_cross_esw_scheduling(struct mlx5_eswitch *esw,
+ struct mlx5_esw_sched_node *parent,
+ struct netlink_ext_ack *extack)
+{
+ if (!parent || esw == parent->esw)
+ return 0;
+
+ if (!MLX5_CAP_QOS(esw->dev, esw_cross_esw_sched)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cross E-Switch scheduling is not supported");
+ return -EOPNOTSUPP;
+ }
+ if (esw->dev->shd != parent->esw->dev->shd) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot add vport to a parent belonging to a different device");
+ return -EOPNOTSUPP;
+ }
+ if (!mlx5_lag_is_active(esw->dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cross E-Switch scheduling requires LAG to be activated");
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int
mlx5_esw_qos_vport_update_parent(struct mlx5_vport *vport,
struct mlx5_esw_sched_node *parent,
struct netlink_ext_ack *extack)
{
struct mlx5_eswitch *esw = vport->dev->priv.eswitch;
- int err = 0;
+ int err;
- if (parent && parent->esw != esw) {
- NL_SET_ERR_MSG_MOD(extack, "Cross E-Switch scheduling is not supported");
- return -EOPNOTSUPP;
- }
+ err = mlx5_esw_validate_cross_esw_scheduling(esw, parent, extack);
+ if (err)
+ return err;
if (!vport->qos.sched_node && parent) {
enum sched_node_type type;
--
2.31.1
Powered by blists - more mailing lists