netdev - [PATCH net-next v2 7/9] xsk: support batch xmit main logic

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20250825135342.53110-8-kerneljasonxing@gmail.com>
Date: Mon, 25 Aug 2025 21:53:40 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: davem@...emloft.net,
	edumazet@...gle.com,
	kuba@...nel.org,
	pabeni@...hat.com,
	bjorn@...nel.org,
	magnus.karlsson@...el.com,
	maciej.fijalkowski@...el.com,
	jonathan.lemon@...il.com,
	sdf@...ichev.me,
	ast@...nel.org,
	daniel@...earbox.net,
	hawk@...nel.org,
	john.fastabend@...il.com,
	horms@...nel.org,
	andrew+netdev@...n.ch
Cc: bpf@...r.kernel.org,
	netdev@...r.kernel.org,
	Jason Xing <kernelxing@...cent.com>
Subject: [PATCH net-next v2 7/9] xsk: support batch xmit main logic

From: Jason Xing <kernelxing@...cent.com>

This function __xsk_generic_xmit_batch() is the core function in batches
xmit, implement a batch version of __xsk_generic_xmit().

The whole logic is divided into sections:
1. check if we have enough available slots in tx ring and completion
   ring.
2. read descriptors from tx ring into xs->desc_batch in batches
3. reserve enough slots in completion ring to avoid backpressure
4. allocate and build skbs in batches
5. send all the possible packets in batches at one time

Signed-off-by: Jason Xing <kernelxing@...cent.com>
---
 net/xdp/xsk.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 213d6100e405..90089a6e78b2 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -789,6 +789,123 @@ struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
 	return ERR_PTR(err);
 }
 
+static int __xsk_generic_xmit_batch(struct xdp_sock *xs)
+{
+	struct xdp_desc *descs = xs->desc_batch;
+	struct xsk_buff_pool *pool = xs->pool;
+	struct sk_buff **skbs = xs->skb_cache;
+	u32 nb_pkts, nb_descs, cons_descs;
+	struct net_device *dev = xs->dev;
+	int start = 0, end = 0, cur = -1;
+	u32 i = 0, max_budget;
+	struct netdev_queue *txq;
+	bool sent_frame = false;
+	u32 max_batch, expected;
+	int err = 0;
+
+	mutex_lock(&xs->mutex);
+
+	/* Since we dropped the RCU read lock, the socket state might have changed. */
+	if (unlikely(!xsk_is_bound(xs))) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	if (xs->queue_id >= dev->real_num_tx_queues)
+		goto out;
+
+	if (unlikely(!netif_running(dev) ||
+		     !netif_carrier_ok(dev)))
+		goto out;
+
+	max_budget = READ_ONCE(xs->max_tx_budget);
+	max_batch = xs->generic_xmit_batch;
+	txq = netdev_get_tx_queue(dev, xs->queue_id);
+
+	for (i = 0; i < max_budget; i += cons_descs) {
+		expected = max_budget - i;
+		expected = max_batch > expected ? expected : max_batch;
+		nb_descs = xskq_cons_nb_entries(xs->tx, expected);
+		if (!nb_descs)
+			goto out;
+
+		/* This is the backpressure mechanism for the Tx path. Try to
+		 * reserve space in the completion queue for all packets, but
+		 * if there are fewer slots available, just process that many
+		 * packets. This avoids having to implement any buffering in
+		 * the Tx path.
+		 */
+		nb_descs = xskq_prod_nb_free(pool->cq, nb_descs);
+		if (!nb_descs) {
+			err = -EAGAIN;
+			goto out;
+		}
+
+		nb_descs = xskq_cons_read_desc_batch(xs->tx, pool, descs, nb_descs);
+		if (!nb_descs) {
+			err = -EAGAIN;
+			xs->tx->queue_empty_descs++;
+			goto out;
+		}
+
+		nb_pkts = xskq_prod_write_addr_batch_locked(pool, descs, nb_descs);
+
+		err = xsk_alloc_batch_skb(xs, nb_pkts, nb_descs, &cons_descs, &start, &end);
+		/* Return 'nb_descs - cons_descs' number of descs to the
+		 * pool if the batch allocation partially fails
+		 */
+		if (cons_descs < nb_descs) {
+			xskq_cons_cancel_n(xs->tx, nb_descs - cons_descs);
+			xsk_cq_cancel_locked(xs->pool, nb_descs - cons_descs);
+		}
+
+		if (start >= end) {
+			int err_xmit;
+
+			err_xmit = xsk_direct_xmit_batch(skbs, dev, txq,
+							 &cur, start, end);
+			if (err_xmit == NETDEV_TX_BUSY) {
+				err = -EAGAIN;
+			} else if (err_xmit == NET_XMIT_DROP) {
+				cur++;
+				err = -EBUSY;
+			}
+
+			sent_frame = true;
+			xs->skb = NULL;
+		}
+
+		if (err)
+			goto out;
+
+		start = 0;
+		end = 0;
+		cur = -1;
+	}
+
+	/* Maximum budget of descriptors have been consumed */
+	err = -EAGAIN;
+
+	if (xskq_has_descs(xs->tx)) {
+		if (xs->skb)
+			xsk_drop_skb(xs->skb);
+	}
+
+out:
+	/* If cur is larger than end, we must to clear the rest of
+	 * sbks staying in the skb_cache
+	 */
+	for (; cur >= end; cur--) {
+		xskq_cons_cancel_n(xs->tx, xsk_get_num_desc(skbs[cur]));
+		xsk_consume_skb(skbs[cur]);
+	}
+	if (sent_frame)
+		__xsk_tx_release(xs);
+
+	mutex_unlock(&xs->mutex);
+	return err;
+}
+
 static int __xsk_generic_xmit(struct sock *sk)
 {
 	struct xdp_sock *xs = xdp_sk(sk);
-- 
2.41.3