lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1757486861-542133-4-git-send-email-tariqt@nvidia.com>
Date: Wed, 10 Sep 2025 09:47:41 +0300
From: Tariq Toukan <tariqt@...dia.com>
To: Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>, "David
 S. Miller" <davem@...emloft.net>
CC: Saeed Mahameed <saeedm@...dia.com>, Leon Romanovsky <leon@...nel.org>,
	Tariq Toukan <tariqt@...dia.com>, Mark Bloch <mbloch@...dia.com>, "John
 Fastabend" <john.fastabend@...il.com>, Sabrina Dubroca <sd@...asysnail.net>,
	<netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, Gal Pressman <gal@...dia.com>, Boris Pismenny
	<borisp@...dia.com>, Shahar Shitrit <shshitrit@...dia.com>
Subject: [PATCH net 3/3] net/mlx5e: kTLS, cancel RX async resync request in error flows

From: Shahar Shitrit <shshitrit@...dia.com>

When packets are lost and the device loses track of TLS records,
the device attempts to resync by tracking TLS records and requests
an async resync from software for this TLS connection.

The TLS module handles such device RX resync requests by logging record
headers and comparing them with the record tcp_sn when provided by the
device. It also increments rcd_delta to track how far the current
record tcp_sn is from the tcp_sn of the original resync request.
If the device later responds with a matching tcp_sn, the TLS module
approves the tcp_sn for resync.

However, the device response may be delayed or never arrive,
particularly due to traffic-related issues such as packet drops or
reordering. In such cases, the TLS module remains unaware that resync
will not complete, and continues performing unnecessary work by logging
headers and incrementing rcd_delta, which can eventually exceed the
threshold and trigger a WARN(). For example, this was observed when the
device got out of tracking, causing
mlx5e_ktls_handle_get_psv_completion() to fail and ultimately leading
to the rcd_delta warning.

To address this, call tls_offload_rx_resync_async_request_cancel()
to cancel the resync request and stop resync tracking in such error
cases. Also, increment the tls_resync_req_skip counter to track these
cancellations.

Signed-off-by: Shahar Shitrit <shshitrit@...dia.com>
Signed-off-by: Tariq Toukan <tariqt@...dia.com>
---
 .../mellanox/mlx5/core/en_accel/ktls_rx.c     | 29 +++++++++++++++++--
 .../mellanox/mlx5/core/en_accel/ktls_txrx.h   |  4 +++
 .../net/ethernet/mellanox/mlx5/core/en_rx.c   |  4 +++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 65ccb33edafb..ec9400de95c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -339,14 +339,19 @@ static void resync_handle_work(struct work_struct *work)
 
 	if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
 		mlx5e_ktls_priv_rx_put(priv_rx);
+		priv_rx->rq_stats->tls_resync_req_skip++;
+		tls_offload_rx_resync_async_request_cancel(priv_rx->sk);
 		return;
 	}
 
 	c = resync->priv->channels.c[priv_rx->rxq];
 	sq = &c->async_icosq;
 
-	if (resync_post_get_progress_params(sq, priv_rx))
+	if (resync_post_get_progress_params(sq, priv_rx)) {
+		priv_rx->rq_stats->tls_resync_req_skip++;
+		tls_offload_rx_resync_async_request_cancel(priv_rx->sk);
 		mlx5e_ktls_priv_rx_put(priv_rx);
+	}
 }
 
 static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync,
@@ -431,8 +436,11 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
 
 	priv_rx = buf->priv_rx;
 	dev = mlx5_core_dma_dev(sq->channel->mdev);
-	if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags)))
+	if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) {
+		priv_rx->rq_stats->tls_resync_req_skip++;
+		tls_offload_rx_resync_async_request_cancel(priv_rx->sk);
 		goto out;
+	}
 
 	dma_sync_single_for_cpu(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE,
 				DMA_FROM_DEVICE);
@@ -443,6 +451,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
 	if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING ||
 	    auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) {
 		priv_rx->rq_stats->tls_resync_req_skip++;
+		tls_offload_rx_resync_async_request_cancel(priv_rx->sk);
 		goto out;
 	}
 
@@ -472,8 +481,10 @@ static bool resync_queue_get_psv(struct sock *sk)
 
 	resync = &priv_rx->resync;
 	mlx5e_ktls_priv_rx_get(priv_rx);
-	if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work)))
+	if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) {
 		mlx5e_ktls_priv_rx_put(priv_rx);
+		return false;
+	}
 
 	return true;
 }
@@ -557,6 +568,18 @@ void mlx5e_ktls_rx_resync(struct net_device *netdev, struct sock *sk,
 	resync_handle_seq_match(priv_rx, c);
 }
 
+void
+mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi)
+{
+	struct mlx5e_ktls_offload_context_rx *priv_rx;
+	struct mlx5e_ktls_rx_resync_buf *buf;
+
+	buf = wi->tls_get_params.buf;
+	priv_rx = buf->priv_rx;
+	priv_rx->rq_stats->tls_resync_req_skip++;
+	tls_offload_rx_resync_async_request_cancel(priv_rx->sk);
+}
+
 /* End of resync section */
 
 void mlx5e_ktls_handle_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
index f87b65c560ea..cb08799769ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_txrx.h
@@ -29,6 +29,10 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi,
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
+
+void
+mlx5e_ktls_rx_resync_async_request_cancel(struct mlx5e_icosq_wqe_info *wi);
+
 static inline bool
 mlx5e_ktls_tx_try_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					  struct mlx5e_tx_wqe_info *wi,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index b8c609d91d11..c713e683ef1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1035,6 +1035,10 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 				netdev_WARN_ONCE(cq->netdev,
 						 "Bad OP in ICOSQ CQE: 0x%x\n",
 						 get_cqe_opcode(cqe));
+#ifdef CONFIG_MLX5_EN_TLS
+				if (wi->wqe_type == MLX5E_ICOSQ_WQE_GET_PSV_TLS)
+					mlx5e_ktls_rx_resync_async_request_cancel(wi);
+#endif
 				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
 						     (struct mlx5_err_cqe *)cqe);
 				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
-- 
2.31.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ