lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250618065553.96822-1-kerneljasonxing@gmail.com>
Date: Wed, 18 Jun 2025 14:55:53 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: davem@...emloft.net,
	edumazet@...gle.com,
	kuba@...nel.org,
	pabeni@...hat.com,
	bjorn@...nel.org,
	magnus.karlsson@...el.com,
	maciej.fijalkowski@...el.com,
	jonathan.lemon@...il.com,
	sdf@...ichev.me,
	ast@...nel.org,
	daniel@...earbox.net,
	hawk@...nel.org,
	john.fastabend@...il.com,
	joe@...a.to
Cc: bpf@...r.kernel.org,
	netdev@...r.kernel.org,
	Jason Xing <kernelxing@...cent.com>
Subject: [PATCH net-next v2] net: xsk: add sysctl_xsk_max_tx_budget in the xmit path

From: Jason Xing <kernelxing@...cent.com>

For some applications, it's quite useful to let users have the chance to
tune the max budget, like accelerating transmission, when xsk is sending
packets. Exposing such a knob also helps auto/AI tuning in the long run.

The patch unifies two definitions into one that is 32 by default and
makes the sysctl knob namespecified.

Signed-off-by: Jason Xing <kernelxing@...cent.com>
---
v2
Link: https://lore.kernel.org/all/20250617002236.30557-1-kerneljasonxing@gmail.com/
1. use a per-netns sysctl knob
2. use sysctl_xsk_max_tx_budget to unify both definitions.
---
 include/net/netns/core.h   |  1 +
 include/net/xdp_sock.h     |  2 +-
 net/core/net_namespace.c   |  1 +
 net/core/sysctl_net_core.c |  8 ++++++++
 net/xdp/xsk.c              | 12 ++++++------
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/net/netns/core.h b/include/net/netns/core.h
index 9b36f0ff0c20..f1ff15fd0032 100644
--- a/include/net/netns/core.h
+++ b/include/net/netns/core.h
@@ -14,6 +14,7 @@ struct netns_core {
 
 	int	sysctl_somaxconn;
 	int	sysctl_optmem_max;
+	int	sysctl_xsk_max_tx_budget;
 	u8	sysctl_txrehash;
 	u8	sysctl_tstamp_allow_data;
 
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e8bd6ddb7b12..57b26ad12aa1 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -65,7 +65,7 @@ struct xdp_sock {
 	struct xsk_queue *tx ____cacheline_aligned_in_smp;
 	struct list_head tx_list;
 	/* record the number of tx descriptors sent by this xsk and
-	 * when it exceeds MAX_PER_SOCKET_BUDGET, an opportunity needs
+	 * when it exceeds sysctl_xsk_max_tx_budget, an opportunity needs
 	 * to be given to other xsks for sending tx descriptors, thereby
 	 * preventing other XSKs from being starved.
 	 */
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae54f26709ca..890f8dc28690 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -396,6 +396,7 @@ static __net_init void preinit_net_sysctl(struct net *net)
 	net->core.sysctl_optmem_max = 128 * 1024;
 	net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
 	net->core.sysctl_tstamp_allow_data = 1;
+	net->core.sysctl_xsk_max_tx_budget = 32;
 }
 
 /* init code that must occur even if setup_net() is not called. */
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5dbb2c6f371d..a51d9c7246ee 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -667,6 +667,14 @@ static struct ctl_table netns_core_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.proc_handler	= proc_dointvec_minmax
 	},
+	{
+		.procname	= "xsk_max_tx_budget",
+		.data		= &init_net.core.sysctl_xsk_max_tx_budget,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.extra1		= SYSCTL_ONE,
+		.proc_handler	= proc_dointvec_minmax
+	},
 	{
 		.procname	= "txrehash",
 		.data		= &init_net.core.sysctl_txrehash,
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 72c000c0ae5f..15df133b50d7 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -33,9 +33,6 @@
 #include "xdp_umem.h"
 #include "xsk.h"
 
-#define TX_BATCH_SIZE 32
-#define MAX_PER_SOCKET_BUDGET (TX_BATCH_SIZE)
-
 void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
 {
 	if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -424,7 +421,10 @@ bool xsk_tx_peek_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
 	rcu_read_lock();
 again:
 	list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) {
-		if (xs->tx_budget_spent >= MAX_PER_SOCKET_BUDGET) {
+		struct sock *sk = (struct sock *)xs;
+		int max_budget = READ_ONCE(sock_net(sk)->core.sysctl_xsk_max_tx_budget);
+
+		if (xs->tx_budget_spent >= max_budget) {
 			budget_exhausted = true;
 			continue;
 		}
@@ -778,8 +778,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 
 static int __xsk_generic_xmit(struct sock *sk)
 {
+	u32 max_budget = READ_ONCE(sock_net(sk)->core.sysctl_xsk_max_tx_budget);
 	struct xdp_sock *xs = xdp_sk(sk);
-	u32 max_batch = TX_BATCH_SIZE;
 	bool sent_frame = false;
 	struct xdp_desc desc;
 	struct sk_buff *skb;
@@ -797,7 +797,7 @@ static int __xsk_generic_xmit(struct sock *sk)
 		goto out;
 
 	while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
-		if (max_batch-- == 0) {
+		if (max_budget-- == 0) {
 			err = -EAGAIN;
 			goto out;
 		}
-- 
2.43.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ