lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200219032205.15264-5-saeedm@mellanox.com>
Date:   Wed, 19 Feb 2020 03:23:08 +0000
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>,
        "kuba@...nel.org" <kuba@...nel.org>
CC:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Aya Levin <ayal@...lanox.com>,
        Moshe Shemesh <moshe@...lanox.com>,
        Jiri Pirko <jiri@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next V4 04/13] net/mlx5e: Support dump callback in TX reporter

From: Aya Levin <ayal@...lanox.com>

Add support for SQ's FW dump on TX reporter's events. Use Resource dump
API to retrieve the relevant data: SX slice, SQ dump and SQ buffer. Wrap
it in formatted messages and store the binary output in devlink core.

Example:
$ devlink health dump show pci/0000:00:0b.0 reporter tx
SX Slice:
   data:
     00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
     22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
     00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
     ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
     00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
  SQs:
    SQ:
      index: 1511
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff
    SQ:
      index: 1516
      data:
        00 00 00 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 02 01 00 00 00 00 80 00 01 00 00 00 00 ad de
        22 01 00 00 00 00 ad de 00 20 40 90 81 88 ff ff
        00 00 00 00 00 00 00 00 15 00 15 00 00 00 00 00
        ff ff ff ff 01 00 00 00 00 00 00 00 00 00 00 00
        00 00 00 00 00 00 00 80 81 ae 41 06 00 ea ff ff

$ devlink health dump show pci/0000:00:0b.0 reporter tx -jp
{
    "SX Slice": {
    	"data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255],
    	},
    "SQs": [ {
            "SQ": {
                "index": 1511,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        },{
            "SQ": {
                "index": 1516,
                "data": [ 0,0,0,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,128,0,1,0,0,0,0,173,222,34,1,0,0,0,0,173,222,0,32,64,144,129,136,255,255,0,0,0,0,0,0,0,0,21,0,21,0,0,0,0,0,255,255,255,255,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,128,129,174,65,6,0,234,255,255]
            }
        } ]
}

Signed-off-by: Aya Levin <ayal@...lanox.com>
Reviewed-by: Moshe Shemesh <moshe@...lanox.com>
Acked-by: Jiri Pirko <jiri@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/health.c   | 105 +++++++++++++++
 .../ethernet/mellanox/mlx5/core/en/health.h   |   8 +-
 .../mellanox/mlx5/core/en/reporter_tx.c       | 123 ++++++++++++++++++
 3 files changed, 234 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3a975641f902..7178f421d2cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -3,6 +3,7 @@
 
 #include "health.h"
 #include "lib/eq.h"
+#include "lib/mlx5.h"
 
 int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
 {
@@ -204,3 +205,107 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
 
 	return devlink_health_report(reporter, err_str, err_ctx);
 }
+
+#define MLX5_HEALTH_DEVLINK_MAX_SIZE 1024
+static int mlx5e_health_rsc_fmsg_binary(struct devlink_fmsg *fmsg,
+					const void *value, u32 value_len)
+
+{
+	u32 data_size;
+	u32 offset;
+	int err;
+
+	for (offset = 0; offset < value_len; offset += data_size) {
+		data_size = value_len - offset;
+		if (data_size > MLX5_HEALTH_DEVLINK_MAX_SIZE)
+			data_size = MLX5_HEALTH_DEVLINK_MAX_SIZE;
+		err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+		if (err)
+			break;
+	}
+	return err;
+}
+
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+			       struct devlink_fmsg *fmsg)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct mlx5_rsc_dump_cmd *cmd;
+	struct page *page;
+	int cmd_err, err;
+	int end_err;
+	int size;
+
+	if (IS_ERR_OR_NULL(mdev->rsc_dump))
+		return -EOPNOTSUPP;
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page)
+		return -ENOMEM;
+
+	err = devlink_fmsg_binary_pair_nest_start(fmsg, "data");
+	if (err)
+		return err;
+
+	cmd = mlx5_rsc_dump_cmd_create(mdev, key);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto free_page;
+	}
+
+	do {
+		cmd_err = mlx5_rsc_dump_next(mdev, cmd, page, &size);
+		if (cmd_err < 0) {
+			err = cmd_err;
+			goto destroy_cmd;
+		}
+
+		err = mlx5e_health_rsc_fmsg_binary(fmsg, page_address(page), size);
+		if (err)
+			goto destroy_cmd;
+
+	} while (cmd_err > 0);
+
+destroy_cmd:
+	mlx5_rsc_dump_cmd_destroy(cmd);
+	end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
+	if (end_err)
+		err = end_err;
+free_page:
+	__free_page(page);
+	return err;
+}
+
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+			    int queue_idx, char *lbl)
+{
+	struct mlx5_rsc_key key = {};
+	int err;
+
+	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+	key.index1 = queue_idx;
+	key.size = PAGE_SIZE;
+	key.num_of_obj1 = 1;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, lbl);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "index", queue_idx);
+	if (err)
+		return err;
+
+	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return devlink_fmsg_obj_nest_end(fmsg);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index d3693fa547ac..e90e3aec422f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -5,6 +5,7 @@
 #define __MLX5E_EN_HEALTH_H
 
 #include "en.h"
+#include "diag/rsc_dump.h"
 
 #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
 
@@ -36,6 +37,7 @@ void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 struct mlx5e_err_ctx {
 	int (*recover)(void *ctx);
+	int (*dump)(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, void *ctx);
 	void *ctx;
 };
 
@@ -48,6 +50,8 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
 int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
 void mlx5e_health_channels_update(struct mlx5e_priv *priv);
-
-
+int mlx5e_health_rsc_fmsg_dump(struct mlx5e_priv *priv, struct mlx5_rsc_key *key,
+			       struct devlink_fmsg *fmsg);
+int mlx5e_health_queue_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+			    int queue_idx, char *lbl);
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
index 623c949db54c..1772c9ce3938 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
@@ -246,6 +246,126 @@ static int mlx5e_tx_reporter_diagnose(struct devlink_health_reporter *reporter,
 	return err;
 }
 
+static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg,
+				     void *ctx)
+{
+	struct mlx5_rsc_key key = {};
+	struct mlx5e_txqsq *sq = ctx;
+	int err;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return 0;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
+	if (err)
+		return err;
+
+	key.size = PAGE_SIZE;
+	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SQ");
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "QPC");
+	if (err)
+		return err;
+
+	key.rsc = MLX5_SGMT_TYPE_FULL_QPC;
+	key.index1 = sq->sqn;
+	key.num_of_obj1 = 1;
+
+	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "send_buff");
+	if (err)
+		return err;
+
+	key.rsc = MLX5_SGMT_TYPE_SND_BUFF;
+	key.num_of_obj2 = MLX5_RSC_DUMP_ALL;
+	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return mlx5e_reporter_named_obj_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv,
+					  struct devlink_fmsg *fmsg)
+{
+	struct mlx5_rsc_key key = {};
+	int i, tc, err;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
+		return 0;
+
+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "SX Slice");
+	if (err)
+		return err;
+
+	key.size = PAGE_SIZE;
+	key.rsc = MLX5_SGMT_TYPE_SX_SLICE_ALL;
+	err = mlx5e_health_rsc_fmsg_dump(priv, &key, fmsg);
+	if (err)
+		return err;
+
+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "SQs");
+	if (err)
+		return err;
+
+	for (i = 0; i < priv->channels.num; i++) {
+		struct mlx5e_channel *c = priv->channels.c[i];
+
+		for (tc = 0; tc < priv->channels.params.num_tc; tc++) {
+			struct mlx5e_txqsq *sq = &c->sq[tc];
+
+			err = mlx5e_health_queue_dump(priv, fmsg, sq->sqn, "SQ");
+			if (err)
+				return err;
+		}
+	}
+	return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+
+static int mlx5e_tx_reporter_dump_from_ctx(struct mlx5e_priv *priv,
+					   struct mlx5e_err_ctx *err_ctx,
+					   struct devlink_fmsg *fmsg)
+{
+	return err_ctx->dump(priv, fmsg, err_ctx->ctx);
+}
+
+static int mlx5e_tx_reporter_dump(struct devlink_health_reporter *reporter,
+				  struct devlink_fmsg *fmsg, void *context,
+				  struct netlink_ext_ack *extack)
+{
+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
+	struct mlx5e_err_ctx *err_ctx = context;
+
+	return err_ctx ? mlx5e_tx_reporter_dump_from_ctx(priv, err_ctx, fmsg) :
+			 mlx5e_tx_reporter_dump_all_sqs(priv, fmsg);
+}
+
 void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
 {
 	struct mlx5e_priv *priv = sq->channel->priv;
@@ -254,6 +374,7 @@ void mlx5e_reporter_tx_err_cqe(struct mlx5e_txqsq *sq)
 
 	err_ctx.ctx = sq;
 	err_ctx.recover = mlx5e_tx_reporter_err_cqe_recover;
+	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
 	sprintf(err_str, "ERR CQE on SQ: 0x%x", sq->sqn);
 
 	mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx);
@@ -267,6 +388,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq)
 
 	err_ctx.ctx = sq;
 	err_ctx.recover = mlx5e_tx_reporter_timeout_recover;
+	err_ctx.dump = mlx5e_tx_reporter_dump_sq;
 	sprintf(err_str,
 		"TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u\n",
 		sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc,
@@ -279,6 +401,7 @@ static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = {
 		.name = "tx",
 		.recover = mlx5e_tx_reporter_recover,
 		.diagnose = mlx5e_tx_reporter_diagnose,
+		.dump = mlx5e_tx_reporter_dump,
 };
 
 #define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500
-- 
2.24.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ