[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210818112428.209111-6-markzhang@nvidia.com>
Date: Wed, 18 Aug 2021 14:24:23 +0300
From: Mark Zhang <markzhang@...dia.com>
To: <jgg@...dia.com>, <dledford@...hat.com>, <saeedm@...dia.com>
CC: <linux-rdma@...r.kernel.org>, <netdev@...r.kernel.org>,
<aharonl@...dia.com>, <netao@...dia.com>, <leonro@...dia.com>,
Mark Zhang <markzhang@...dia.com>
Subject: [PATCH rdma-next 05/10] RDMA/mlx5: Add steering support in optional flow counters
From: Aharon Landau <aharonl@...dia.com>
Adding steering infrastructure for adding and removing optional counter.
This allows to add and remove the counters dynamically in order not to
hurt performance.
Signed-off-by: Aharon Landau <aharonl@...dia.com>
Reviewed-by: Maor Gottlieb <maorg@...dia.com>
Signed-off-by: Mark Zhang <markzhang@...dia.com>
---
drivers/infiniband/hw/mlx5/fs.c | 111 +++++++++++++++++++++++++++
drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 +++
include/rdma/ib_hdrs.h | 1 +
3 files changed, 124 insertions(+)
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 5fbc0a8454b9..be6a00969ddb 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -10,12 +10,14 @@
#include <rdma/uverbs_std_types.h>
#include <rdma/mlx5_user_ioctl_cmds.h>
#include <rdma/mlx5_user_ioctl_verbs.h>
+#include <rdma/ib_hdrs.h>
#include <rdma/ib_umem.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/fs_helpers.h>
#include <linux/mlx5/accel.h>
#include <linux/mlx5/eswitch.h>
+#include <net/inet_ecn.h>
#include "mlx5_ib.h"
#include "counters.h"
#include "devx.h"
@@ -847,6 +849,115 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
return prio;
}
+enum {
+ RDMA_RX_ECN_OPCOUNTER_PRIO,
+ RDMA_RX_CNP_OPCOUNTER_PRIO,
+};
+
+enum {
+ RDMA_TX_CNP_OPCOUNTER_PRIO,
+};
+
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type)
+{
+ enum mlx5_flow_namespace_type fn_type;
+ struct mlx5_flow_act flow_act = {};
+ struct mlx5_flow_destination dst;
+ struct mlx5_flow_namespace *ns;
+ struct mlx5_ib_flow_prio *prio;
+ struct mlx5_flow_spec *spec;
+ int priority, err = 0;
+
+ spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+ if (!spec)
+ return -ENOMEM;
+
+ switch (type) {
+ case MLX5_IB_OPCOUNTER_CC_RX_CE_PKTS:
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_ECN_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ outer_headers.ip_ecn);
+ MLX5_SET(fte_match_param, spec->match_value,
+ outer_headers.ip_ecn, INET_ECN_CE);
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_RX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_receive_rdma.bth_opcode)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_RX_COUNTERS;
+ priority = RDMA_RX_CNP_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+ break;
+
+ case MLX5_IB_OPCOUNTER_CC_TX_CNP_PKTS:
+ if (!MLX5_CAP_FLOWTABLE(dev->mdev,
+ ft_field_support_2_nic_transmit_rdma.bth_opcode)) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ fn_type = MLX5_FLOW_NAMESPACE_RDMA_TX_COUNTERS;
+ priority = RDMA_TX_CNP_OPCOUNTER_PRIO;
+ spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
+ MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+ misc_parameters.bth_opcode);
+ MLX5_SET(fte_match_param, spec->match_value,
+ misc_parameters.bth_opcode, IB_BTH_OPCODE_CNP);
+ break;
+
+ default:
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+
+ ns = mlx5_get_flow_namespace(dev->mdev, fn_type);
+ if (!ns) {
+ err = -EOPNOTSUPP;
+ goto free;
+ }
+ prio = _get_prio(ns, &opfc->prio, priority, 1, 1, 0);
+ if (IS_ERR(prio)) {
+ err = PTR_ERR(prio);
+ goto free;
+ }
+
+ dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+ dst.counter_id = mlx5_fc_id(opfc->fc);
+
+ flow_act.action =
+ MLX5_FLOW_CONTEXT_ACTION_COUNT | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+ opfc->rule =
+ mlx5_add_flow_rules(prio->flow_table, spec, &flow_act, &dst, 1);
+ if (IS_ERR(opfc->rule)) {
+ put_flow_table(dev, prio, false);
+ err = PTR_ERR(opfc->rule);
+ goto free;
+ }
+ prio->refcount++;
+
+free:
+ kfree(spec);
+ return err;
+}
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc)
+{
+ mlx5_del_flow_rules(opfc->rule);
+ put_flow_table(dev, &opfc->prio, true);
+ WARN_ON(opfc->prio.flow_table);
+}
+
static void set_underlay_qp(struct mlx5_ib_dev *dev,
struct mlx5_flow_spec *spec,
u32 underlay_qpn)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 2ba352702294..130b2ed79ba2 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -805,6 +805,12 @@ enum mlx5_ib_optional_counter_type {
MLX5_IB_OPCOUNTER_MAX,
};
+struct mlx5_ib_op_fc {
+ struct mlx5_fc *fc;
+ struct mlx5_ib_flow_prio prio;
+ struct mlx5_flow_handle *rule;
+};
+
struct mlx5_ib_counters {
const char **names;
size_t *offsets;
@@ -814,6 +820,12 @@ struct mlx5_ib_counters {
u16 set_id;
};
+int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, struct mlx5_ib_op_fc *opfc,
+ enum mlx5_ib_optional_counter_type type);
+
+void mlx5_ib_fs_remove_op_fc(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_op_fc *opfc);
+
struct mlx5_ib_multiport_info;
struct mlx5_ib_multiport {
diff --git a/include/rdma/ib_hdrs.h b/include/rdma/ib_hdrs.h
index 7e542205861c..8ae07c0ecdf7 100644
--- a/include/rdma/ib_hdrs.h
+++ b/include/rdma/ib_hdrs.h
@@ -232,6 +232,7 @@ static inline u32 ib_get_sqpn(struct ib_other_headers *ohdr)
#define IB_BTH_SE_SHIFT 23
#define IB_BTH_TVER_MASK 0xf
#define IB_BTH_TVER_SHIFT 16
+#define IB_BTH_OPCODE_CNP 0x81
static inline u8 ib_bth_get_pad(struct ib_other_headers *ohdr)
{
--
2.26.2
Powered by blists - more mailing lists