[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241008183222.137702-12-tariqt@nvidia.com>
Date: Tue, 8 Oct 2024 21:32:19 +0300
From: Tariq Toukan <tariqt@...dia.com>
To: "David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>
CC: <netdev@...r.kernel.org>, Saeed Mahameed <saeedm@...dia.com>, Gal Pressman
<gal@...dia.com>, Leon Romanovsky <leonro@...dia.com>, <cjubran@...dia.com>,
<cratiu@...dia.com>, Tariq Toukan <tariqt@...dia.com>
Subject: [PATCH net-next 11/14] net/mlx5: qos: Store rate groups in a qos domain
From: Cosmin Ratiu <cratiu@...dia.com>
Groups are currently maintained as a list in their corresponding
eswitch, protected by the esw state_lock.
The upcoming cross-eswitch scheduling feature cannot work with this
approach, as it would require acquiring multiple eswitch locks (in the
correct order) in order to maintain group membership.
This commit moves the rate groups into a new 'qos domain' struct and
adds explicit qos init/cleanup steps to the eswitch init/cleanup.
Upcoming patches will expand the qos domain struct and allow it to be
shared between eswitches. For now, qos domains are private to each esw
so there's only an extra indirection.
Signed-off-by: Cosmin Ratiu <cratiu@...dia.com>
Signed-off-by: Tariq Toukan <tariqt@...dia.com>
---
.../net/ethernet/mellanox/mlx5/core/esw/qos.c | 58 ++++++++++++++++---
.../net/ethernet/mellanox/mlx5/core/esw/qos.h | 3 +
.../net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++-
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +-
4 files changed, 65 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 5891a68633af..06b3a21a7475 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -11,6 +11,37 @@
/* Minimum supported BW share value by the HW is 1 Mbit/sec */
#define MLX5_MIN_BW_SHARE 1
+/* Holds rate groups associated with an E-Switch. */
+struct mlx5_qos_domain {
+ /* List of all mlx5_esw_rate_groups. */
+ struct list_head groups;
+};
+
+static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
+{
+ struct mlx5_qos_domain *qos_domain;
+
+ qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
+ if (!qos_domain)
+ return NULL;
+
+ INIT_LIST_HEAD(&qos_domain->groups);
+
+ return qos_domain;
+}
+
+static int esw_qos_domain_init(struct mlx5_eswitch *esw)
+{
+ esw->qos.domain = esw_qos_domain_alloc();
+
+ return esw->qos.domain ? 0 : -ENOMEM;
+}
+
+static void esw_qos_domain_release(struct mlx5_eswitch *esw)
+{
+ kfree(esw->qos.domain);
+ esw->qos.domain = NULL;
+}
struct mlx5_esw_rate_group {
u32 tsar_ix;
@@ -19,6 +50,7 @@ struct mlx5_esw_rate_group {
u32 min_rate;
/* A computed value indicating relative min_rate between group members. */
u32 bw_share;
+ /* Membership in the qos domain 'groups' list. */
struct list_head parent_entry;
/* The eswitch this group belongs to. */
struct mlx5_eswitch *esw;
@@ -128,10 +160,10 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw)
/* Find max min_rate across all esw groups.
* This will correspond to fw_max_bw_share in the final bw_share calculation.
*/
- list_for_each_entry(group, &esw->qos.groups, parent_entry) {
- if (group->min_rate < max_guarantee || group->tsar_ix == esw->qos.root_tsar_ix)
- continue;
- max_guarantee = group->min_rate;
+ list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) {
+ if (group->esw == esw && group->tsar_ix != esw->qos.root_tsar_ix &&
+ group->min_rate > max_guarantee)
+ max_guarantee = group->min_rate;
}
if (max_guarantee)
@@ -183,8 +215,8 @@ static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e
u32 bw_share;
int err;
- list_for_each_entry(group, &esw->qos.groups, parent_entry) {
- if (group->tsar_ix == esw->qos.root_tsar_ix)
+ list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) {
+ if (group->esw != esw || group->tsar_ix == esw->qos.root_tsar_ix)
continue;
bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
@@ -452,7 +484,7 @@ __esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix)
group->esw = esw;
group->tsar_ix = tsar_ix;
INIT_LIST_HEAD(&group->members);
- list_add_tail(&group->parent_entry, &esw->qos.groups);
+ list_add_tail(&group->parent_entry, &esw->qos.domain->groups);
return group;
}
@@ -586,7 +618,6 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
return err;
}
- INIT_LIST_HEAD(&esw->qos.groups);
if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
} else {
@@ -868,6 +899,17 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
return 0;
}
+int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
+{
+ return esw_qos_domain_init(esw);
+}
+
+void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
+{
+ if (esw->qos.domain)
+ esw_qos_domain_release(esw);
+}
+
/* Eswitch devlink rate API */
int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
index c4f04c3e6a59..44fb339c5dcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -6,6 +6,9 @@
#ifdef CONFIG_MLX5_ESWITCH
+int mlx5_esw_qos_init(struct mlx5_eswitch *esw);
+void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw);
+
int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min_rate);
void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 4a187f39daba..9de819c45d33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1481,6 +1481,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
mlx5_eq_notifier_register(esw->dev, &esw->nb);
+ err = mlx5_esw_qos_init(esw);
+ if (err)
+ goto err_qos_init;
+
if (esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
} else {
@@ -1489,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
}
if (err)
- goto abort;
+ goto err_esw_enable;
esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
@@ -1503,7 +1507,10 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
return 0;
-abort:
+err_esw_enable:
+ mlx5_esw_qos_cleanup(esw);
+err_qos_init:
+ mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
mlx5_esw_acls_ns_cleanup(esw);
return err;
}
@@ -1631,6 +1638,7 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
if (esw->mode == MLX5_ESWITCH_OFFLOADS)
devl_rate_nodes_destroy(devlink);
+ mlx5_esw_qos_cleanup(esw);
}
void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 567276900a37..e57be2eeec85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -336,6 +336,7 @@ enum {
};
struct dentry;
+struct mlx5_qos_domain;
struct mlx5_eswitch {
struct mlx5_core_dev *dev;
@@ -368,12 +369,12 @@ struct mlx5_eswitch {
*/
refcount_t refcnt;
u32 root_tsar_ix;
+ struct mlx5_qos_domain *domain;
/* Contains all vports with QoS enabled but no explicit group.
* Cannot be NULL if QoS is enabled, but may be a fake group
* referencing the root TSAR if the esw doesn't support groups.
*/
struct mlx5_esw_rate_group *group0;
- struct list_head groups; /* Protected by esw->state_lock */
} qos;
struct mlx5_esw_bridge_offloads *br_offloads;
--
2.44.0
Powered by blists - more mailing lists