lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190708142203.GJ2201@nanopsycho>
Date:   Mon, 8 Jul 2019 16:22:03 +0200
From:   Jiri Pirko <jiri@...nulli.us>
To:     Tariq Toukan <tariqt@...lanox.com>
Cc:     "David S. Miller" <davem@...emloft.net>, netdev@...r.kernel.org,
        Eran Ben Elisha <eranbe@...lanox.com>, ayal@...lanox.com,
        jiri@...lanox.com, Saeed Mahameed <saeedm@...lanox.com>,
        moshe@...lanox.com
Subject: Re: [PATCH net-next 11/16] net/mlx5e: Add support to rx reporter
 diagnose

Sun, Jul 07, 2019 at 01:53:03PM CEST, tariqt@...lanox.com wrote:
>From: Aya Levin <ayal@...lanox.com>
>
>Add rx reporter, which supports diagnose call-back. Diagnostics output
>include: information common to all RQs: RQ type, RQ size, RQ stride
>size, CQ size and CQ stride size. In addition advertise information per
>RQ and its related icosq and attached CQ.
>
>$ devlink health diagnose pci/0000:00:0b.0 reporter rx
>Common config:
>   RQ: type: 2 stride size: 2048 size: 8
>   CQ: stride size: 64 size: 1024
> RQs:
>   channel ix: 0 rqn: 4284 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1032 HW status: 0
>   channel ix: 1 rqn: 4289 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1036 HW status: 0
>   channel ix: 2 rqn: 4294 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1040 HW status: 0
>   channel ix: 3 rqn: 4299 HW state: 1 SW state: 3 posted WQEs: 7 cc: 7 ICOSQ HW state: 1 CQ: cqn: 1044 HW status: 0
>
>$ devlink health diagnose pci/0000:00:0b.0 reporter rx -jp
>{
>    "Common config": [
>        "RQ": {
>            "type": 2,
>            "stride size": 2048,
>            "size": 8
>        },
>        "CQ": {
>            "stride size": 64,
>            "size": 1024
>        } ],
>    "RQs": [ {
>            "channel ix": 0,
>            "rqn": 4284,
>            "HW state": 1,
>            "SW state": 3,
>            "posted WQEs": 7,
>            "cc": 7,
>            "ICOSQ HW state": 1,
>            "CQ": {
>                "cqn": 1032,
>                "HW status": 0
>            }
>        },{
>            "channel ix": 1,
>            "rqn": 4289,
>            "HW state": 1,
>            "SW state": 3,
>            "posted WQEs": 7,
>            "cc": 7,
>            "ICOSQ HW state": 1,
>            "CQ": {
>                "cqn": 1036,
>                "HW status": 0
>            }
>        },{
>            "channel ix": 2,
>            "rqn": 4294,
>            "HW state": 1,
>            "SW state": 3,
>            "posted WQEs": 7,
>            "cc": 7,
>            "ICOSQ HW state": 1,
>            "CQ": {
>                "cqn": 1040,
>                "HW status": 0
>            }
>        },{
>            "channel ix": 3,
>            "rqn": 4299,
>            "HW state": 1,
>            "SW state": 3,
>            "posted WQEs": 7,
>            "cc": 7,
>            "ICOSQ HW state": 1,
>            "CQ": {
>                "cqn": 1044,
>                "HW status": 0
>            }
>        } ]
>}
>
>Signed-off-by: Aya Levin <ayal@...lanox.com>
>Signed-off-by: Tariq Toukan <tariqt@...lanox.com>
>---
> drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   4 +-
> drivers/net/ethernet/mellanox/mlx5/core/en.h       |  21 +++
> .../net/ethernet/mellanox/mlx5/core/en/health.c    |  31 ++++
> .../net/ethernet/mellanox/mlx5/core/en/health.h    |   7 +
> .../ethernet/mellanox/mlx5/core/en/reporter_rx.c   | 194 +++++++++++++++++++++
> drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  29 +--
> 6 files changed, 258 insertions(+), 28 deletions(-)
> create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>index 23d566a45a30..a3b9659649a8 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
>@@ -24,8 +24,8 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
> mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
> 		en_tx.o en_rx.o en_dim.o en_txrx.o en/xdp.o en_stats.o \
> 		en_selftest.o en/port.o en/monitor_stats.o en/health.o \
>-		en/reporter_tx.o en/params.o en/xsk/umem.o en/xsk/setup.o \
>-		en/xsk/rx.o en/xsk/tx.o
>+		en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/umem.o \
>+		en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o
> 
> #
> # Netdev extra
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
>index 263558875f20..f7c5cf7a7064 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
>@@ -848,6 +848,7 @@ struct mlx5e_priv {
> 	struct mlx5e_tls          *tls;
> #endif
> 	struct devlink_health_reporter *tx_reporter;
>+	struct devlink_health_reporter *rx_reporter;
> 	struct mlx5e_xsk           xsk;
> };
> 
>@@ -888,6 +889,26 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
> int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
> void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
> 
>+static inline u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
>+{
>+	switch (rq->wq_type) {
>+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
>+		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
>+	default:
>+		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
>+	}
>+}
>+
>+static inline u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
>+{
>+	switch (rq->wq_type) {
>+	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
>+		return rq->mpwqe.wq.cur_sz;
>+	default:
>+		return rq->wqe.wq.cur_sz;
>+	}
>+}
>+
> bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
> bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
> 				struct mlx5e_params *params);
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
>index a266717d41e5..a0579de8e2e0 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
>@@ -96,6 +96,37 @@ int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *
> 	return 0;
> }
> 
>+int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
>+{
>+	int err;
>+
>+	err = mlx5e_reporter_tx_create(priv);
>+	if (err)
>+		return err;
>+
>+	err = mlx5e_reporter_rx_create(priv);
>+	if (err)
>+		return err;
>+
>+	return 0;
>+}
>+
>+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
>+{
>+	mlx5e_reporter_rx_destroy(priv);
>+	mlx5e_reporter_tx_destroy(priv);
>+}
>+
>+void mlx5e_health_channels_update(struct mlx5e_priv *priv)
>+{
>+	if (priv->tx_reporter)
>+		devlink_health_reporter_state_update(priv->tx_reporter,
>+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
>+	if (priv->rx_reporter)
>+		devlink_health_reporter_state_update(priv->rx_reporter,
>+						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
>+}

Could you do this introduction of mlx5e_health_create_reporters(),
mlx5e_health_destroy_reporters(), and mlx5e_health_channels_update()
in a separate patch before this one?


>+
> int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
> {
> 	struct mlx5_core_dev *mdev = channel->mdev;
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>index b0c8bda3d25f..7829ea229914 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
>@@ -16,6 +16,9 @@
> int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name);
> int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
> 
>+int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
>+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
>+
> #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
> 
> struct mlx5e_err_ctx {
>@@ -29,5 +32,9 @@ struct mlx5e_err_ctx {
> int mlx5e_health_report(struct mlx5e_priv *priv,
> 			struct devlink_health_reporter *reporter, char *err_str,
> 			struct mlx5e_err_ctx *err_ctx);
>+int mlx5e_health_create_reporters(struct mlx5e_priv *priv);
>+void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv);
>+void mlx5e_health_channels_update(struct mlx5e_priv *priv);
>+
> 
> #endif
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>new file mode 100644
>index 000000000000..b24bdff473b0
>--- /dev/null
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
>@@ -0,0 +1,194 @@
>+// SPDX-License-Identifier: GPL-2.0
>+// Copyright (c) 2019 Mellanox Technologies.
>+
>+#include "health.h"
>+#include "params.h"
>+
>+static int mlx5e_query_rq_state(struct mlx5_core_dev *dev, u32 rqn, u8 *state)
>+{
>+	int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
>+	void *out;
>+	void *rqc;
>+	int err;
>+
>+	out = kvzalloc(outlen, GFP_KERNEL);
>+	if (!out)
>+		return -ENOMEM;
>+
>+	err = mlx5_core_query_rq(dev, rqn, out);
>+	if (err)
>+		goto out;
>+
>+	rqc = MLX5_ADDR_OF(query_rq_out, out, rq_context);
>+	*state = MLX5_GET(rqc, rqc, state);
>+
>+out:
>+	kvfree(out);
>+	return err;
>+}
>+
>+static int mlx5e_rx_reporter_build_diagnose_output(struct mlx5e_rq *rq,
>+						   struct devlink_fmsg *fmsg)
>+{
>+	struct mlx5e_priv *priv = rq->channel->priv;
>+	struct mlx5e_params *params = &priv->channels.params;
>+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
>+	u8 icosq_hw_state;
>+	int wqes_sz;
>+	u8 hw_state;
>+	u16 wq_head;
>+	int err;
>+
>+	err = mlx5e_query_rq_state(priv->mdev, rq->rqn, &hw_state);
>+	if (err)
>+		return err;
>+
>+	err = mlx5_core_query_sq_state(priv->mdev, icosq->sqn, &icosq_hw_state);
>+	if (err)
>+		return err;
>+
>+	wqes_sz = mlx5e_rqwq_get_cur_sz(rq);
>+	wq_head = params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
>+		  rq->mpwqe.wq.head : mlx5_wq_cyc_get_head(&rq->wqe.wq);
>+
>+	err = devlink_fmsg_obj_nest_start(fmsg);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u32_pair_put(fmsg, "channel ix", rq->channel->ix);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u32_pair_put(fmsg, "rqn", rq->rqn);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u8_pair_put(fmsg, "HW state", hw_state);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u8_pair_put(fmsg, "SW state", rq->state);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u32_pair_put(fmsg, "posted WQEs", wqes_sz);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u32_pair_put(fmsg, "cc", wq_head);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_u8_pair_put(fmsg, "ICOSQ HW state", icosq_hw_state);
>+	if (err)
>+		return err;
>+
>+	err = mlx5e_reporter_cq_diagnose(&rq->cq, fmsg);
>+	if (err)
>+		return err;
>+
>+	err = devlink_fmsg_obj_nest_end(fmsg);
>+	if (err)
>+		return err;
>+
>+	return 0;
>+}
>+
>+static int mlx5e_rx_reporter_diagnose(struct devlink_health_reporter *reporter,
>+				      struct devlink_fmsg *fmsg)
>+{
>+	struct mlx5e_priv *priv = devlink_health_reporter_priv(reporter);
>+	struct mlx5e_params *params = &priv->channels.params;
>+	struct mlx5e_rq *generic_rq;
>+	u32 rq_stride, rq_sz;
>+	int i, err = 0;
>+
>+	mutex_lock(&priv->state_lock);
>+
>+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
>+		goto unlock;
>+
>+	generic_rq = &priv->channels.c[0]->rq;
>+	rq_sz = mlx5e_rqwq_get_size(generic_rq);
>+	rq_stride = BIT(mlx5e_mpwqe_get_log_stride_size(priv->mdev, params, NULL));
>+
>+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "Common config");
>+	if (err)
>+		goto unlock;
>+
>+	err = mlx5e_reporter_named_obj_nest_start(fmsg, "RQ");
>+	if (err)
>+		goto unlock;
>+
>+	err = devlink_fmsg_u8_pair_put(fmsg, "type", params->rq_wq_type);
>+	if (err)
>+		goto unlock;
>+
>+	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", rq_stride);
>+	if (err)
>+		goto unlock;
>+
>+	err = devlink_fmsg_u32_pair_put(fmsg, "size", rq_sz);
>+	if (err)
>+		goto unlock;
>+
>+	err = devlink_fmsg_arr_pair_nest_end(fmsg);

This is odd. I think that you should have
mlx5e_reporter_named_obj_nest_end() called here and
devlink_fmsg_arr_pair_nest_end() after
mlx5e_reporter_cq_common_diagnose()

Misuse of this seems to be a pattern. We need some checker for this
apparently.


>+	if (err)
>+		goto unlock;
>+
>+	err = mlx5e_reporter_cq_common_diagnose(&generic_rq->cq, fmsg);
>+	if (err)
>+		goto unlock;
>+
>+	err = mlx5e_reporter_named_obj_nest_end(fmsg);
>+	if (err)
>+		goto unlock;
>+
>+	err = devlink_fmsg_arr_pair_nest_start(fmsg, "RQs");
>+	if (err)
>+		goto unlock;
>+
>+	for (i = 0; i < priv->channels.num; i++) {
>+		struct mlx5e_rq *rq = &priv->channels.c[i]->rq;
>+
>+		err = mlx5e_rx_reporter_build_diagnose_output(rq, fmsg);
>+		if (err)
>+			goto unlock;
>+	}
>+	err = devlink_fmsg_arr_pair_nest_end(fmsg);
>+	if (err)
>+		goto unlock;
>+unlock:
>+	mutex_unlock(&priv->state_lock);
>+	return err;
>+}
>+
>+static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = {
>+		.name = "rx",
>+		.diagnose = mlx5e_rx_reporter_diagnose,
>+};
>+
>+int mlx5e_reporter_rx_create(struct mlx5e_priv *priv)
>+{
>+	struct devlink_health_reporter *reporter;
>+	struct mlx5_core_dev *mdev = priv->mdev;
>+	struct devlink *devlink = priv_to_devlink(mdev);
>+
>+	reporter =
>+		devlink_health_reporter_create(devlink, &mlx5_rx_reporter_ops,
>+					       0, false, priv);

Rather align like this:

	reporter = devlink_health_reporter_create(devlink,
						  &mlx5_rx_reporter_ops,
						  0, false, priv);


>+	if (IS_ERR(reporter))
>+		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
>+			    PTR_ERR(reporter));
>+	else
>+		priv->tx_reporter = reporter;
>+	return PTR_ERR_OR_ZERO(reporter);

Change the flow to:

	if (IS_ERR(reporter)) {
		netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n",
			    PTR_ERR(reporter));
		return PTR_ERR(reporter);
	}
	priv->tx_reporter = reporter;
	return 0;


>+}
>+
>+void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv)
>+{
>+	if (!priv->rx_reporter)
>+		return;
>+
>+	devlink_health_reporter_destroy(priv->rx_reporter);
>+}
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>index 98c925e72706..3922905e909f 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
>@@ -247,26 +247,6 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
> 	ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
> }
> 
>-static u32 mlx5e_rqwq_get_size(struct mlx5e_rq *rq)
>-{
>-	switch (rq->wq_type) {
>-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
>-		return mlx5_wq_ll_get_size(&rq->mpwqe.wq);
>-	default:
>-		return mlx5_wq_cyc_get_size(&rq->wqe.wq);
>-	}
>-}
>-
>-static u32 mlx5e_rqwq_get_cur_sz(struct mlx5e_rq *rq)
>-{
>-	switch (rq->wq_type) {
>-	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
>-		return rq->mpwqe.wq.cur_sz;
>-	default:
>-		return rq->wqe.wq.cur_sz;
>-	}
>-}
>-
> static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
> 				     struct mlx5e_channel *c)
> {
>@@ -2320,10 +2300,7 @@ int mlx5e_open_channels(struct mlx5e_priv *priv,
> 			goto err_close_channels;
> 	}
> 
>-	if (priv->tx_reporter)
>-		devlink_health_reporter_state_update(priv->tx_reporter,
>-						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
>-
>+	mlx5e_health_channels_update(priv);
> 	kvfree(cparam);
> 	return 0;
> 
>@@ -3201,7 +3178,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
> {
> 	int tc;
> 
>-	mlx5e_reporter_tx_destroy(priv);
> 	for (tc = 0; tc < priv->profile->max_tc; tc++)
> 		mlx5e_destroy_tis(priv->mdev, priv->tisn[tc]);
> }
>@@ -4985,12 +4961,14 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
> 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
> 	mlx5e_build_nic_netdev(netdev);
> 	mlx5e_build_tc2txq_maps(priv);
>+	mlx5e_health_create_reporters(priv);
> 
> 	return 0;
> }
> 
> static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
> {
>+	mlx5e_health_destroy_reporters(priv);
> 	mlx5e_tls_cleanup(priv);
> 	mlx5e_ipsec_cleanup(priv);
> 	mlx5e_netdev_cleanup(priv->netdev, priv);
>@@ -5093,7 +5071,6 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
> #ifdef CONFIG_MLX5_CORE_EN_DCB
> 	mlx5e_dcbnl_initialize(priv);
> #endif
>-	mlx5e_reporter_tx_create(priv);
> 	return 0;
> }
> 
>-- 
>1.8.3.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ