[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230210221821.271571-2-saeed@kernel.org>
Date: Fri, 10 Feb 2023 14:18:07 -0800
From: Saeed Mahameed <saeed@...nel.org>
To: "David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
Eric Dumazet <edumazet@...gle.com>
Cc: Saeed Mahameed <saeedm@...dia.com>, netdev@...r.kernel.org,
Tariq Toukan <tariqt@...dia.com>, Roi Dayan <roid@...dia.com>,
Maor Dickman <maord@...dia.com>
Subject: [net-next 01/15] net/mlx5: Lag, Let user configure multiport eswitch
From: Roi Dayan <roid@...dia.com>
Instead of activating multiport eswitch dynamically through
adding a TC rule and meeting certain conditions, allow the user
to activate it through devlink.
This will remove the forced requirement of using TC.
e.g. Bridge offload.
Example:
$ devlink dev param set pci/0000:00:0b.0 name esw_multiport value 1 \
cmode runtime
Signed-off-by: Roi Dayan <roid@...dia.com>
Reviewed-by: Maor Dickman <maord@...dia.com>
Signed-off-by: Saeed Mahameed <saeedm@...dia.com>
---
Documentation/networking/devlink/mlx5.rst | 4 ++
.../net/ethernet/mellanox/mlx5/core/devlink.c | 56 +++++++++++++++++++
.../net/ethernet/mellanox/mlx5/core/devlink.h | 1 +
.../mellanox/mlx5/core/en/tc/act/mirred.c | 9 ---
.../net/ethernet/mellanox/mlx5/core/en_tc.c | 22 +-------
.../net/ethernet/mellanox/mlx5/core/en_tc.h | 6 --
.../net/ethernet/mellanox/mlx5/core/lag/lag.c | 4 +-
.../net/ethernet/mellanox/mlx5/core/lag/lag.h | 1 +
.../ethernet/mellanox/mlx5/core/lag/mpesw.c | 46 +++++++--------
.../ethernet/mellanox/mlx5/core/lag/mpesw.h | 12 +---
10 files changed, 87 insertions(+), 74 deletions(-)
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 29ad304e6fba..1d2ad2727da1 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -54,6 +54,10 @@ parameters.
- Control the number of large groups (size > 1) in the FDB table.
* The default value is 15, and the range is between 1 and 1024.
+ * - ``esw_multiport``
+ - Boolean
+ - runtime
+ - Set the E-Switch lag mode to multiport.
The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index b742e04deec1..49392870f695 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -7,6 +7,7 @@
#include "fw_reset.h"
#include "fs_core.h"
#include "eswitch.h"
+#include "lag/lag.h"
#include "esw/qos.h"
#include "sf/dev/dev.h"
#include "sf/sf.h"
@@ -437,6 +438,55 @@ static int mlx5_devlink_large_group_num_validate(struct devlink *devlink, u32 id
return 0;
}
+static int mlx5_devlink_esw_multiport_set(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+ int err = 0;
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ if (ctx->val.vbool)
+ err = mlx5_lag_mpesw_enable(dev);
+ else
+ mlx5_lag_mpesw_disable(dev);
+
+ return err;
+}
+
+static int mlx5_devlink_esw_multiport_get(struct devlink *devlink, u32 id,
+ struct devlink_param_gset_ctx *ctx)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_ESWITCH_MANAGER(dev))
+ return -EOPNOTSUPP;
+
+ ctx->val.vbool = mlx5_lag_mpesw_is_activated(dev);
+ return 0;
+}
+
+static int mlx5_devlink_esw_multiport_validate(struct devlink *devlink, u32 id,
+ union devlink_param_value val,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_core_dev *dev = devlink_priv(devlink);
+
+ if (!MLX5_ESWITCH_MANAGER(dev)) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch is unsupported");
+ return -EOPNOTSUPP;
+ }
+
+ if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch must be in switchdev mode");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
#endif
static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
@@ -455,6 +505,12 @@ static const struct devlink_param mlx5_devlink_params[] = {
BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL,
mlx5_devlink_large_group_num_validate),
+ DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
+ "esw_multiport", DEVLINK_PARAM_TYPE_BOOL,
+ BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+ mlx5_devlink_esw_multiport_get,
+ mlx5_devlink_esw_multiport_set,
+ mlx5_devlink_esw_multiport_validate),
#endif
DEVLINK_PARAM_GENERIC(IO_EQ_SIZE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
NULL, NULL, mlx5_devlink_eq_depth_validate),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index b561107e0df1..212b12424146 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -11,6 +11,7 @@ enum mlx5_devlink_param_id {
MLX5_DEVLINK_PARAM_ID_FLOW_STEERING_MODE,
MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
+ MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
};
struct mlx5_trap_ctx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
index c095a12346de..07cc65596f89 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c
@@ -216,7 +216,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
struct net_device *uplink_dev;
struct mlx5e_priv *out_priv;
struct mlx5_eswitch *esw;
- bool is_uplink_rep;
int *ifindexes;
int if_count;
int err;
@@ -231,7 +230,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
parse_state->ifindexes[if_count] = out_dev->ifindex;
parse_state->if_count++;
- is_uplink_rep = mlx5e_eswitch_uplink_rep(out_dev);
if (mlx5_lag_mpesw_do_mirred(priv->mdev, out_dev, extack))
return -EOPNOTSUPP;
@@ -275,13 +273,6 @@ parse_mirred(struct mlx5e_tc_act_parse_state *parse_state,
esw_attr->dests[esw_attr->out_count].rep = rpriv->rep;
esw_attr->dests[esw_attr->out_count].mdev = out_priv->mdev;
- /* If output device is bond master then rules are not explicit
- * so we don't attempt to count them.
- */
- if (is_uplink_rep && MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
- MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
- attr->lag.count = true;
-
esw_attr->out_count++;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e2ec80ebde58..00d5b0aa295b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2124,9 +2124,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
free_branch_attr(flow, attr->branch_true);
free_branch_attr(flow, attr->branch_false);
- if (flow->attr->lag.count)
- mlx5_lag_del_mpesw_rule(esw->dev);
-
kvfree(attr->esw_attr->rx_tun_attr);
kvfree(attr->parse_attr);
kfree(flow->attr);
@@ -4277,12 +4274,7 @@ static bool is_lag_dev(struct mlx5e_priv *priv,
static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
{
- if (same_hw_reps(priv, out_dev) &&
- MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
- MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
- return true;
-
- return false;
+ return same_hw_reps(priv, out_dev) && mlx5_lag_mpesw_is_activated(priv->mdev);
}
bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
@@ -4584,7 +4576,6 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5_core_dev *in_mdev)
{
struct flow_rule *rule = flow_cls_offload_flow_rule(f);
- struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct netlink_ext_ack *extack = f->common.extack;
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5e_tc_flow *flow;
@@ -4617,26 +4608,17 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
if (err)
goto err_free;
- if (flow->attr->lag.count) {
- err = mlx5_lag_add_mpesw_rule(esw->dev);
- if (err)
- goto err_free;
- }
-
err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
complete_all(&flow->init_done);
if (err) {
if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
- goto err_lag;
+ goto err_free;
add_unready_flow(flow);
}
return flow;
-err_lag:
- if (flow->attr->lag.count)
- mlx5_lag_del_mpesw_rule(esw->dev);
err_free:
mlx5e_flow_put(priv, flow);
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index ce516dc7f3fd..a0ed9d95a634 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -90,12 +90,6 @@ struct mlx5_flow_attr {
u32 exe_aso_type;
struct list_head list;
struct mlx5e_post_act_handle *post_act_handle;
- struct {
- /* Indicate whether the parsed flow should be counted for lag mode decision
- * making
- */
- bool count;
- } lag;
struct mlx5_flow_attr *branch_true;
struct mlx5_flow_attr *branch_false;
struct mlx5_flow_attr *jumping_attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
index dbf218cac535..301994741b08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
@@ -230,7 +230,6 @@ static void mlx5_ldev_free(struct kref *ref)
mlx5_lag_mp_cleanup(ldev);
cancel_delayed_work_sync(&ldev->bond_work);
destroy_workqueue(ldev->wq);
- mlx5_lag_mpesw_cleanup(ldev);
mutex_destroy(&ldev->lock);
kfree(ldev);
}
@@ -276,7 +275,6 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev)
mlx5_core_err(dev, "Failed to init multipath lag err=%d\n",
err);
- mlx5_lag_mpesw_init(ldev);
ldev->ports = MLX5_CAP_GEN(dev, num_lag_ports);
ldev->buckets = 1;
@@ -688,7 +686,7 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
}
#define MLX5_LAG_OFFLOADS_SUPPORTED_PORTS 2
-static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
+bool mlx5_lag_check_prereq(struct mlx5_lag *ldev)
{
#ifdef CONFIG_MLX5_ESWITCH
struct mlx5_core_dev *dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
index 66013bef9939..2dbd96a86ef8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h
@@ -102,6 +102,7 @@ mlx5_lag_is_ready(struct mlx5_lag *ldev)
return test_bit(MLX5_LAG_FLAG_NDEVS_READY, &ldev->state_flags);
}
+bool mlx5_lag_check_prereq(struct mlx5_lag *ldev);
void mlx5_modify_lag(struct mlx5_lag *ldev,
struct lag_tracker *tracker);
int mlx5_activate_lag(struct mlx5_lag *ldev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
index 3799f89ed1a6..3f8fc965cec6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.c
@@ -7,18 +7,19 @@
#include "eswitch.h"
#include "lib/mlx5.h"
-static int add_mpesw_rule(struct mlx5_lag *ldev)
+static int enable_mpesw(struct mlx5_lag *ldev)
{
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
int err;
- if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
- return 0;
+ if (ldev->mode != MLX5_LAG_MODE_NONE)
+ return -EINVAL;
- if (ldev->mode != MLX5_LAG_MODE_NONE) {
- err = -EINVAL;
- goto out_err;
- }
+ if (mlx5_eswitch_mode(dev) != MLX5_ESWITCH_OFFLOADS ||
+ !MLX5_CAP_PORT_SELECTION(dev, port_select_flow_table) ||
+ !MLX5_CAP_GEN(dev, create_lag_when_not_master_up) ||
+ !mlx5_lag_check_prereq(ldev))
+ return -EOPNOTSUPP;
err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
if (err) {
@@ -29,14 +30,12 @@ static int add_mpesw_rule(struct mlx5_lag *ldev)
return 0;
out_err:
- atomic_dec(&ldev->lag_mpesw.mpesw_rule_count);
return err;
}
-static void del_mpesw_rule(struct mlx5_lag *ldev)
+static void disable_mpesw(struct mlx5_lag *ldev)
{
- if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
- ldev->mode == MLX5_LAG_MODE_MPESW)
+ if (ldev->mode == MLX5_LAG_MODE_MPESW)
mlx5_disable_lag(ldev);
}
@@ -46,12 +45,17 @@ static void mlx5_mpesw_work(struct work_struct *work)
struct mlx5_lag *ldev = mpesww->lag;
mutex_lock(&ldev->lock);
+ if (ldev->mode_changes_in_progress) {
+ mpesww->result = -EAGAIN;
+ goto unlock;
+ }
+
if (mpesww->op == MLX5_MPESW_OP_ENABLE)
- mpesww->result = add_mpesw_rule(ldev);
+ mpesww->result = enable_mpesw(ldev);
else if (mpesww->op == MLX5_MPESW_OP_DISABLE)
- del_mpesw_rule(ldev);
+ disable_mpesw(ldev);
+unlock:
mutex_unlock(&ldev->lock);
-
complete(&mpesww->comp);
}
@@ -86,12 +90,12 @@ static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
return err;
}
-void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
+void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev)
{
mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE);
}
-int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
+int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev)
{
return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
}
@@ -118,13 +122,3 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
return ldev && ldev->mode == MLX5_LAG_MODE_MPESW;
}
-
-void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
-{
- atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
-}
-
-void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
-{
- WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count));
-}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
index 818f19b5a984..571e4acf262e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mpesw.h
@@ -9,7 +9,6 @@
struct lag_mpesw {
struct work_struct mpesw_work;
- atomic_t mpesw_rule_count;
};
enum mpesw_op {
@@ -29,14 +28,7 @@ int mlx5_lag_mpesw_do_mirred(struct mlx5_core_dev *mdev,
struct net_device *out_dev,
struct netlink_ext_ack *extack);
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
-void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
-int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
-#if IS_ENABLED(CONFIG_MLX5_ESWITCH)
-void mlx5_lag_mpesw_init(struct mlx5_lag *ldev);
-void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev);
-#else
-static inline void mlx5_lag_mpesw_init(struct mlx5_lag *ldev) {}
-static inline void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev) {}
-#endif
+void mlx5_lag_mpesw_disable(struct mlx5_core_dev *dev);
+int mlx5_lag_mpesw_enable(struct mlx5_core_dev *dev);
#endif /* __MLX5_LAG_MPESW_H__ */
--
2.39.1
Powered by blists - more mailing lists