lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190820202352.2995-12-saeedm@mellanox.com>
Date:   Tue, 20 Aug 2019 20:24:32 +0000
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
CC:     "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
        Aya Levin <ayal@...lanox.com>,
        Tariq Toukan <tariqt@...lanox.com>,
        Jiri Pirko <jiri@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net-next v2 11/16] net/mlx5e: Report and recover from rx timeout

From: Aya Levin <ayal@...lanox.com>

Add support for report and recovery from rx timeout. On driver open we
post NOP work request on the rx channels to trigger napi in order to
fillup the rx rings. In case napi wasn't scheduled due to a lost
interrupt, perform EQ recovery.

Signed-off-by: Aya Levin <ayal@...lanox.com>
Reviewed-by: Tariq Toukan <tariqt@...lanox.com>
Acked-by: Jiri Pirko <jiri@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/health.h   |  1 +
 .../mellanox/mlx5/core/en/reporter_rx.c       | 32 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  1 +
 3 files changed, 34 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
index 8acd9dc520cf..b4a2d9be17d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.h
@@ -19,6 +19,7 @@ int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg);
 int mlx5e_reporter_rx_create(struct mlx5e_priv *priv);
 void mlx5e_reporter_rx_destroy(struct mlx5e_priv *priv);
 void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq);
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq);
 
 #define MLX5E_REPORTER_PER_Q_MAX_LEN 256
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index ff49853b65d2..05450df87554 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -115,6 +115,38 @@ void mlx5e_reporter_icosq_cqe_err(struct mlx5e_icosq *icosq)
 	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
 }
 
+static int mlx5e_rx_reporter_timeout_recover(void *ctx)
+{
+	struct mlx5e_icosq *icosq;
+	struct mlx5_eq_comp *eq;
+	struct mlx5e_rq *rq;
+	int err;
+
+	rq = ctx;
+	icosq = &rq->channel->icosq;
+	eq = rq->cq.mcq.eq;
+	err = mlx5e_health_channel_eq_recover(eq, rq->channel);
+	if (err)
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
+
+	return err;
+}
+
+void mlx5e_reporter_rx_timeout(struct mlx5e_rq *rq)
+{
+	struct mlx5e_icosq *icosq = &rq->channel->icosq;
+	struct mlx5e_priv *priv = rq->channel->priv;
+	char err_str[MLX5E_REPORTER_PER_Q_MAX_LEN];
+	struct mlx5e_err_ctx err_ctx = {};
+
+	err_ctx.ctx = rq;
+	err_ctx.recover = mlx5e_rx_reporter_timeout_recover;
+	sprintf(err_str, "RX timeout on channel: %d, ICOSQ: 0x%x RQ: 0x%x, CQ: 0x%x\n",
+		icosq->channel->ix, icosq->sqn, rq->rqn, rq->cq.mcq.cqn);
+
+	mlx5e_health_report(priv, priv->rx_reporter, err_str, &err_ctx);
+}
+
 static int mlx5e_rx_reporter_recover_from_ctx(struct mlx5e_err_ctx *err_ctx)
 {
 	return err_ctx->recover(err_ctx->ctx);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e43ea1c27b0..54f320391f63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -808,6 +808,7 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
 	netdev_warn(c->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
 		    c->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
 
+	mlx5e_reporter_rx_timeout(rq);
 	return -ETIMEDOUT;
 }
 
-- 
2.21.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ