lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220713225859.401241-5-saeed@kernel.org>
Date:   Wed, 13 Jul 2022 15:58:48 -0700
From:   Saeed Mahameed <saeed@...nel.org>
To:     "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        Eric Dumazet <edumazet@...gle.com>
Cc:     Saeed Mahameed <saeedm@...dia.com>, netdev@...r.kernel.org,
        Michael Guralnik <michaelgur@...dia.com>,
        Mark Bloch <mbloch@...dia.com>
Subject: [net-next 04/15] net/mlx5: Expose vnic diagnostic counters for eswitch managed vports

From: Michael Guralnik <michaelgur@...dia.com>

Expose on vport group managers debug counters for their managed vports.

Counters are exposed through debugfs, the directory will be present only
for functions that are eswitch managers and only counters that are
supported on their specific HW/FW will be exposed.

Example:
$ ls /sys/kernel/debug/mlx5/0000:08:00.0/esw/
pf sf_8  vf_0  vf_1

$ ls -l /sys/kernel/debug/mlx5/0000:08:00.0/esw/vf_0/vnic_diag/
cq_overrun
quota_exceeded_command
total_q_under_processor_handle
invalid_command
send_queue_priority_update_flow

List of all counter added:
total_q_under_processor_handle - number of queues in error state due to an
async error or errored command.
send_queue_priority_update_flow - number of QP/SQ priority/SL update
events.
cq_overrun - number of times CQ entered an error state due to an
overflow.
async_eq_overrun -number of time an EQ mapped to async events was
overrun.
comp_eq_overrun - number of time an EQ mapped to completion events was
overrun.
quota_exceeded_command - number of commands issued and failed due to quota
exceeded.
invalid_command - number of commands issued and failed dues to any reason
other than quota exceeded.

Signed-off-by: Michael Guralnik <michaelgur@...dia.com>
Reviewed-by: Mark Bloch <mbloch@...dia.com>
Signed-off-by: Saeed Mahameed <saeedm@...dia.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../ethernet/mellanox/mlx5/core/esw/debugfs.c | 182 ++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/eswitch.c |   6 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |   5 +
 .../mellanox/mlx5/core/eswitch_offloads.c     |   3 +
 5 files changed, 197 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 5dadc2fce7ee..7ab432cc522f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -68,7 +68,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE)   += en/tc/sample.o
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
 				      ecpf.o rdma.o esw/legacy.o \
-				      esw/devlink_port.o esw/vporttbl.o esw/qos.o
+				      esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
 
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
 				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
new file mode 100644
index 000000000000..2db13c71e88c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <linux/debugfs.h>
+#include "eswitch.h"
+
+enum vnic_diag_counter {
+	MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
+	MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
+	MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
+	MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
+	MLX5_VNIC_DIAG_CQ_OVERRUN,
+	MLX5_VNIC_DIAG_INVALID_COMMAND,
+	MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+};
+
+static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+				    u32 *val)
+{
+	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+	struct mlx5_core_dev *dev = vport->dev;
+	u16 vport_num = vport->vport;
+	void *vnic_diag_out;
+	int err;
+
+	MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+	if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
+		MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
+	switch (counter) {
+	case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
+		break;
+	case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
+				send_queue_priority_update_flow);
+		break;
+	case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
+		break;
+	case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
+		break;
+	case MLX5_VNIC_DIAG_CQ_OVERRUN:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
+		break;
+	case MLX5_VNIC_DIAG_INVALID_COMMAND:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
+		break;
+	case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+		*val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+		break;
+	}
+
+	return 0;
+}
+
+static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+			    enum vnic_diag_counter type)
+{
+	u32 val = 0;
+	int ret;
+
+	ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+	if (ret)
+		return ret;
+
+	seq_printf(file, "%d\n", val);
+	return 0;
+}
+
+static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
+}
+
+static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private,
+				MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
+}
+
+static int comp_eq_overrun_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
+}
+
+static int async_eq_overrun_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
+}
+
+static int cq_overrun_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
+}
+
+static int invalid_command_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
+}
+
+static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+{
+	return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+}
+
+DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+DEFINE_SHOW_ATTRIBUTE(invalid_command);
+DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+	debugfs_remove_recursive(vport->dbgfs);
+	vport->dbgfs = NULL;
+}
+
+/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
+#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
+
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
+{
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+	struct dentry *vnic_diag;
+	char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
+	int err;
+
+	if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+		return;
+
+	if (vport_num == MLX5_VPORT_PF) {
+		strcpy(dir_name, "pf");
+	} else if (vport_num == MLX5_VPORT_ECPF) {
+		strcpy(dir_name, "ecpf");
+	} else {
+		err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
+			       is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
+		if (WARN_ON(err < 0))
+			return;
+	}
+
+	vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
+	vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
+
+	if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
+		debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
+				    &total_q_under_processor_handle_fops);
+		debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
+				    &send_queue_priority_update_flow_fops);
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
+		debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
+				    &comp_eq_overrun_fops);
+		debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
+				    &async_eq_overrun_fops);
+	}
+
+	if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
+		debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
+
+	if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
+		debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
+				    &invalid_command_fops);
+
+	if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+		debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+				    &quota_exceeded_command_fops);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b95f75431882..30a6c9fbf1b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -36,6 +36,7 @@
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/mpfs.h>
+#include <linux/debugfs.h>
 #include "esw/acl/lgcy.h"
 #include "esw/legacy.h"
 #include "esw/qos.h"
@@ -1002,6 +1003,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
 	if (err)
 		return err;
 
+	mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
 	err = esw_offloads_load_rep(esw, vport_num);
 	if (err)
 		goto err_rep;
@@ -1009,6 +1011,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
 	return err;
 
 err_rep:
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_vport_disable(esw, vport_num);
 	return err;
 }
@@ -1016,6 +1019,7 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
 void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	esw_offloads_unload_rep(esw, vport_num);
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_vport_disable(esw, vport_num);
 }
 
@@ -1622,6 +1626,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	dev->priv.eswitch = esw;
 	BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
 
+	esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
 	esw_info(dev,
 		 "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
 		 esw->total_vports,
@@ -1645,6 +1650,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
 	esw_info(esw->dev, "cleanup\n");
 
+	debugfs_remove_recursive(esw->dbgfs);
 	esw->dev->priv.eswitch = NULL;
 	destroy_workqueue(esw->work_queue);
 	WARN_ON(refcount_read(&esw->qos.refcnt));
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index c19604b06a2c..87ce5a208cb5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -191,6 +191,7 @@ struct mlx5_vport {
 	enum mlx5_eswitch_vport_event enabled_events;
 	int index;
 	struct devlink_port *dl_port;
+	struct dentry *dbgfs;
 };
 
 struct mlx5_esw_indir_table;
@@ -336,6 +337,7 @@ struct mlx5_eswitch {
 		u32             large_group_num;
 	}  params;
 	struct blocking_notifier_head n_head;
+	struct dentry *dbgfs;
 };
 
 void esw_offloads_disable(struct mlx5_eswitch *esw);
@@ -684,6 +686,9 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
 void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
 struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
 
+void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
+void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+
 int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
 				      u16 vport_num, u32 controller, u32 sfnum);
 void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d3da52e3fc67..85b3aa4d7955 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -3704,12 +3704,14 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
 	if (err)
 		goto devlink_err;
 
+	mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
 	err = mlx5_esw_offloads_rep_load(esw, vport_num);
 	if (err)
 		goto rep_err;
 	return 0;
 
 rep_err:
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
 devlink_err:
 	mlx5_esw_vport_disable(esw, vport_num);
@@ -3719,6 +3721,7 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
 void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
 {
 	mlx5_esw_offloads_rep_unload(esw, vport_num);
+	mlx5_esw_vport_debugfs_destroy(esw, vport_num);
 	mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
 	mlx5_esw_vport_disable(esw, vport_num);
 }
-- 
2.36.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ