lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1768376800-1607672-5-git-send-email-tariqt@nvidia.com>
Date: Wed, 14 Jan 2026 09:46:40 +0200
From: Tariq Toukan <tariqt@...dia.com>
To: Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
	Paolo Abeni <pabeni@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>, "David
 S. Miller" <davem@...emloft.net>
CC: Saeed Mahameed <saeedm@...dia.com>, Leon Romanovsky <leon@...nel.org>,
	Tariq Toukan <tariqt@...dia.com>, Mark Bloch <mbloch@...dia.com>,
	<netdev@...r.kernel.org>, <linux-rdma@...r.kernel.org>,
	<linux-kernel@...r.kernel.org>, Gal Pressman <gal@...dia.com>, William Tu
	<witu@...dia.com>, <toke@...hat.com>
Subject: [PATCH net-next V2 4/4] net/mlx5e: Conditionally create async ICOSQ

From: William Tu <witu@...dia.com>

The async ICOSQ is only required by TLS RX (for re-sync flow) and XSK
TX. Create it only when these features are enabled instead of always
allocating it. This reduces per-channel memory usage, saves hardware
resources, improves latency, and decreases the default number of SQs
(from 3 to 2) and CQs (from 4 to 3). It also speeds up channel
open/close operations for a netdev when async ICOSQ is not needed.

Currently when TLS RX is enabled, there is no channel reset triggered.
As a result, async ICOSQ allocation is not triggered, causing a NULL
pointer crash. One solution is to do channel reset every time when
toggling TLS RX. However, it's not straightforward as the offload
state matters only on connection creation, and can go on beyond the
channels reset.

Instead, introduce a new field 'ktls_rx_was_enabled': if TLS RX is
enabled for the first time: reset channels, create async ICOSQ, set
the field. From that point on, no need to reset channels for any TLS
RX enable/disable. Async ICOSQ will always be needed.

For XSK TX, async ICOSQ is used in wakeup control and is guaranteed
to have async ICOSQ allocated.

This improves the latency of interface up/down operations when it
applies.

Perf numbers:
NIC: Connect-X7.
Test: Latency of interface up + down operations.

Measured 20% speedup.
Saving ~0.36 sec for 248 channels (~1.45 msec per channel).

Signed-off-by: William Tu <witu@...dia.com>
Signed-off-by: Tariq Toukan <tariqt@...dia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../mellanox/mlx5/core/en_accel/ktls.c        | 10 ++++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 ++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 38 ++++++++++---------
 4 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index a7076b26fd5c..d16bdef95703 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -939,6 +939,7 @@ struct mlx5e_priv {
 	u8                         max_opened_tc;
 	bool                       tx_ptp_opened;
 	bool                       rx_ptp_opened;
+	bool                       ktls_rx_was_enabled;
 	struct kernel_hwtstamp_config hwtstamp_config;
 	u16                        q_counter[MLX5_SD_MAX_GROUP_SZ];
 	u16                        drop_rq_q_counter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index e3e57c849436..1c2cc2aad2b0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -135,10 +135,15 @@ int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable)
 	int err = 0;
 
 	mutex_lock(&priv->state_lock);
-	if (enable)
+	if (enable) {
 		err = mlx5e_accel_fs_tcp_create(priv->fs);
-	else
+		if (!err && !priv->ktls_rx_was_enabled) {
+			priv->ktls_rx_was_enabled = true;
+			mlx5e_safe_reopen_channels(priv);
+		}
+	} else {
 		mlx5e_accel_fs_tcp_destroy(priv->fs);
+	}
 	mutex_unlock(&priv->state_lock);
 
 	return err;
@@ -161,6 +166,7 @@ int mlx5e_ktls_init_rx(struct mlx5e_priv *priv)
 			destroy_workqueue(priv->tls->rx_wq);
 			return err;
 		}
+		priv->ktls_rx_was_enabled = true;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index aa4ff3963b86..d04ba93fe617 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2632,7 +2632,8 @@ static void mlx5e_close_async_icosq(struct mlx5e_icosq *async_icosq)
 
 static int mlx5e_open_queues(struct mlx5e_channel *c,
 			     struct mlx5e_params *params,
-			     struct mlx5e_channel_param *cparam)
+			     struct mlx5e_channel_param *cparam,
+			     bool async_icosq_needed)
 {
 	const struct net_device_ops *netdev_ops = c->netdev->netdev_ops;
 	struct dim_cq_moder icocq_moder = {0, 0};
@@ -2668,10 +2669,13 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
 	if (err)
 		goto err_close_rx_cq;
 
-	c->async_icosq = mlx5e_open_async_icosq(c, params, cparam, &ccp);
-	if (IS_ERR(c->async_icosq)) {
-		err = PTR_ERR(c->async_icosq);
-		goto err_close_rq_xdpsq_cq;
+	if (async_icosq_needed) {
+		c->async_icosq = mlx5e_open_async_icosq(c, params, cparam,
+							&ccp);
+		if (IS_ERR(c->async_icosq)) {
+			err = PTR_ERR(c->async_icosq);
+			goto err_close_rq_xdpsq_cq;
+		}
 	}
 
 	mutex_init(&c->icosq_recovery_lock);
@@ -2708,7 +2712,8 @@ static int mlx5e_open_queues(struct mlx5e_channel *c,
 	mlx5e_close_icosq(&c->icosq);
 
 err_close_async_icosq:
-	mlx5e_close_async_icosq(c->async_icosq);
+	if (c->async_icosq)
+		mlx5e_close_async_icosq(c->async_icosq);
 
 err_close_rq_xdpsq_cq:
 	if (c->xdp)
@@ -2740,7 +2745,8 @@ static void mlx5e_close_queues(struct mlx5e_channel *c)
 	mlx5e_close_sqs(c);
 	mlx5e_close_icosq(&c->icosq);
 	mutex_destroy(&c->icosq_recovery_lock);
-	mlx5e_close_async_icosq(c->async_icosq);
+	if (c->async_icosq)
+		mlx5e_close_async_icosq(c->async_icosq);
 	if (c->xdp)
 		mlx5e_close_cq(&c->rq_xdpsq.cq);
 	mlx5e_close_cq(&c->rq.cq);
@@ -2825,6 +2831,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	struct mlx5e_channel_param *cparam;
 	struct mlx5_core_dev *mdev;
 	struct mlx5e_xsk_param xsk;
+	bool async_icosq_needed;
 	struct mlx5e_channel *c;
 	unsigned int irq;
 	int vec_ix;
@@ -2874,7 +2881,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
 	netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix);
 	netif_napi_set_irq_locked(&c->napi, irq);
 
-	err = mlx5e_open_queues(c, params, cparam);
+	async_icosq_needed = !!xsk_pool || priv->ktls_rx_was_enabled;
+	err = mlx5e_open_queues(c, params, cparam, async_icosq_needed);
 	if (unlikely(err))
 		goto err_napi_del;
 
@@ -2912,7 +2920,8 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_activate_txqsq(&c->sq[tc]);
 	mlx5e_activate_icosq(&c->icosq);
-	mlx5e_activate_icosq(c->async_icosq);
+	if (c->async_icosq)
+		mlx5e_activate_icosq(c->async_icosq);
 
 	if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
 		mlx5e_activate_xsk(c);
@@ -2933,7 +2942,8 @@ static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
 	else
 		mlx5e_deactivate_rq(&c->rq);
 
-	mlx5e_deactivate_icosq(c->async_icosq);
+	if (c->async_icosq)
+		mlx5e_deactivate_icosq(c->async_icosq);
 	mlx5e_deactivate_icosq(&c->icosq);
 	for (tc = 0; tc < c->num_tc; tc++)
 		mlx5e_deactivate_txqsq(&c->sq[tc]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 57c54265dbda..b31f689fe271 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -125,6 +125,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 {
 	struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel,
 					       napi);
+	struct mlx5e_icosq *aicosq = c->async_icosq;
 	struct mlx5e_ch_stats *ch_stats = c->stats;
 	struct mlx5e_xdpsq *xsksq = &c->xsksq;
 	struct mlx5e_txqsq __rcu **qos_sqs;
@@ -180,16 +181,18 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 	busy |= work_done == budget;
 
 	mlx5e_poll_ico_cq(&c->icosq.cq);
-	if (mlx5e_poll_ico_cq(&c->async_icosq->cq))
-		/* Don't clear the flag if nothing was polled to prevent
-		 * queueing more WQEs and overflowing the async ICOSQ.
-		 */
-		clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX,
-			  &c->async_icosq->state);
-
-	/* Keep after async ICOSQ CQ poll */
-	if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget)))
-		busy |= mlx5e_ktls_rx_handle_resync_list(c, budget);
+	if (aicosq) {
+		if (mlx5e_poll_ico_cq(&aicosq->cq))
+			/* Don't clear the flag if nothing was polled to prevent
+			 * queueing more WQEs and overflowing the async ICOSQ.
+			 */
+			clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX,
+				  &aicosq->state);
+
+		/* Keep after async ICOSQ CQ poll */
+		if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget)))
+			busy |= mlx5e_ktls_rx_handle_resync_list(c, budget);
+	}
 
 	busy |= INDIRECT_CALL_2(rq->post_wqes,
 				mlx5e_post_rx_mpwqes,
@@ -237,16 +240,17 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
 
 	mlx5e_cq_arm(&rq->cq);
 	mlx5e_cq_arm(&c->icosq.cq);
-	mlx5e_cq_arm(&c->async_icosq->cq);
+	if (aicosq) {
+		mlx5e_cq_arm(&aicosq->cq);
+		if (xsk_open) {
+			mlx5e_handle_rx_dim(xskrq);
+			mlx5e_cq_arm(&xsksq->cq);
+			mlx5e_cq_arm(&xskrq->cq);
+		}
+	}
 	if (c->xdpsq)
 		mlx5e_cq_arm(&c->xdpsq->cq);
 
-	if (xsk_open) {
-		mlx5e_handle_rx_dim(xskrq);
-		mlx5e_cq_arm(&xsksq->cq);
-		mlx5e_cq_arm(&xskrq->cq);
-	}
-
 	if (unlikely(aff_change && busy_xsk)) {
 		mlx5e_trigger_irq(&c->icosq);
 		ch_stats->force_irq++;
-- 
2.31.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ