lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu,  6 Feb 2020 12:57:09 -0800
From:   Saeed Mahameed <saeedm@...lanox.com>
To:     "David S. Miller" <davem@...emloft.net>
Cc:     netdev@...r.kernel.org, Tariq Toukan <tariqt@...lanox.com>,
        Aya Levin <ayal@...lanox.com>,
        Saeed Mahameed <saeedm@...lanox.com>
Subject: [net 4/5] net/mlx5e: TX, Error completion is for last WQE in batch

From: Tariq Toukan <tariqt@...lanox.com>

For a cyclic work queue, when not requesting a completion per WQE,
a single CQE might indicate the completion of several WQEs.
However, in case some WQE in the batch causes an error, then an error
completion is issued, breaking the batch, and pointing to the offending
WQE in the wqe_counter field.

Hence, WQE-specific error CQE handling (like printing, breaking, etc...)
should be performed only for the last WQE in batch.

Fixes: 130c7b46c93d ("net/mlx5e: TX, Dump WQs wqe descriptors on CQE with error events")
Fixes: fd9b4be8002c ("net/mlx5e: RX, Support multiple outstanding UMR posts")
Signed-off-by: Tariq Toukan <tariqt@...lanox.com>
Reviewed-by: Aya Levin <ayal@...lanox.com>
Signed-off-by: Saeed Mahameed <saeedm@...lanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   | 16 +++++----
 .../net/ethernet/mellanox/mlx5/core/en_tx.c   | 33 ++++++++-----------
 2 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 9e9960146e5b..1c3ab69cbd96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -613,13 +613,6 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
-		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
-			netdev_WARN_ONCE(cq->channel->netdev,
-					 "Bad OP in ICOSQ CQE: 0x%x\n", get_cqe_opcode(cqe));
-			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
-				queue_work(cq->channel->priv->wq, &sq->recover_work);
-			break;
-		}
 		do {
 			struct mlx5e_sq_wqe_info *wi;
 			u16 ci;
@@ -629,6 +622,15 @@ void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
 			wi = &sq->db.ico_wqe[ci];
 
+			if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+				netdev_WARN_ONCE(cq->channel->netdev,
+						 "Bad OP in ICOSQ CQE: 0x%x\n",
+						 get_cqe_opcode(cqe));
+				if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
+					queue_work(cq->channel->priv->wq, &sq->recover_work);
+				break;
+			}
+
 			if (likely(wi->opcode == MLX5_OPCODE_UMR)) {
 				sqcc += MLX5E_UMR_WQEBBS;
 				wi->umr.rq->mpwqe.umr_completed++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 2565ba8692d9..ee60383adc5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -451,34 +451,17 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 	i = 0;
 	do {
+		struct mlx5e_tx_wqe_info *wi;
 		u16 wqe_counter;
 		bool last_wqe;
+		u16 ci;
 
 		mlx5_cqwq_pop(&cq->wq);
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
-		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
-			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
-					      &sq->state)) {
-				struct mlx5e_tx_wqe_info *wi;
-				u16 ci;
-
-				ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
-				wi = &sq->db.wqe_info[ci];
-				mlx5e_dump_error_cqe(sq,
-						     (struct mlx5_err_cqe *)cqe);
-				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
-				queue_work(cq->channel->priv->wq,
-					   &sq->recover_work);
-			}
-			stats->cqe_err++;
-		}
-
 		do {
-			struct mlx5e_tx_wqe_info *wi;
 			struct sk_buff *skb;
-			u16 ci;
 			int j;
 
 			last_wqe = (sqcc == wqe_counter);
@@ -516,6 +499,18 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 			napi_consume_skb(skb, napi_budget);
 		} while (!last_wqe);
 
+		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
+			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
+					      &sq->state)) {
+				mlx5e_dump_error_cqe(sq,
+						     (struct mlx5_err_cqe *)cqe);
+				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
+				queue_work(cq->channel->priv->wq,
+					   &sq->recover_work);
+			}
+			stats->cqe_err++;
+		}
+
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	stats->cqes += i;
-- 
2.24.1

Powered by blists - more mailing lists