[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251021131209.41491-4-kerneljasonxing@gmail.com>
Date: Tue, 21 Oct 2025 21:12:03 +0800
From: Jason Xing <kerneljasonxing@...il.com>
To: davem@...emloft.net,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
bjorn@...nel.org,
magnus.karlsson@...el.com,
maciej.fijalkowski@...el.com,
jonathan.lemon@...il.com,
sdf@...ichev.me,
ast@...nel.org,
daniel@...earbox.net,
hawk@...nel.org,
john.fastabend@...il.com,
joe@...a.to,
willemdebruijn.kernel@...il.com
Cc: bpf@...r.kernel.org,
netdev@...r.kernel.org,
Jason Xing <kernelxing@...cent.com>
Subject: [PATCH net-next v3 3/9] xsk: add xsk_alloc_batch_skb() to build skbs in batch
From: Jason Xing <kernelxing@...cent.com>
Support allocating and building skbs in batch.
This patch uses kmem_cache_alloc_bulk() to complete the batch allocation
which relies on the global common cache 'net_hotdata.skbuff_cache'. Use
a xsk standalone skb cache (namely, xs->skb_cache) to store allocated
skbs instead of resorting to napi_alloc_cache that was designed for
softirq condition.
After allocating memory for each of skbs, in a 'for' loop, the patch
borrows part of __allocate_skb() to initialize skb and then calls
xsk_build_skb() to complete the rest of initialization process, like
copying data and stuff.
Add batch.send_queue and use the skb->list to make skbs into one chain
so that they can be easily sent which is shown in the subsequent patches.
In terms of freeing skbs process, napi_consume_skb() in the tx completion
would put the skb into global cache 'net_hotdata.skbuff_cache' that
implements the deferred freeing skb feature to avoid freeing skb one
by one to improve the performance.
Signed-off-by: Jason Xing <kernelxing@...cent.com>
---
include/net/xdp_sock.h | 3 ++
net/core/skbuff.c | 101 +++++++++++++++++++++++++++++++++++++++++
net/xdp/xsk.c | 1 +
3 files changed, 105 insertions(+)
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 8944f4782eb6..cb5aa8a314fe 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -47,8 +47,10 @@ struct xsk_map {
struct xsk_batch {
u32 generic_xmit_batch;
+ unsigned int skb_count;
struct sk_buff **skb_cache;
struct xdp_desc *desc_cache;
+ struct sk_buff_head send_queue;
};
struct xdp_sock {
@@ -130,6 +132,7 @@ struct xsk_tx_metadata_ops {
struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct sk_buff *allocated_skb,
struct xdp_desc *desc);
+int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err);
#ifdef CONFIG_XDP_SOCKETS
int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bc12790017b0..5b6d3b4fa895 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -81,6 +81,8 @@
#include <net/page_pool/helpers.h>
#include <net/psp/types.h>
#include <net/dropreason.h>
+#include <net/xdp_sock.h>
+#include <net/xsk_buff_pool.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -615,6 +617,105 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
return obj;
}
+int xsk_alloc_batch_skb(struct xdp_sock *xs, u32 nb_pkts, u32 nb_descs, int *err)
+{
+ struct xsk_batch *batch = &xs->batch;
+ struct xdp_desc *descs = batch->desc_cache;
+ struct sk_buff **skbs = batch->skb_cache;
+ gfp_t gfp_mask = xs->sk.sk_allocation;
+ struct net_device *dev = xs->dev;
+ int node = NUMA_NO_NODE;
+ struct sk_buff *skb;
+ u32 i = 0, j = 0;
+ bool pfmemalloc;
+ u32 base_len;
+ u8 *data;
+
+ base_len = max(NET_SKB_PAD, L1_CACHE_ALIGN(dev->needed_headroom));
+ if (!(dev->priv_flags & IFF_TX_SKB_NO_LINEAR))
+ base_len += dev->needed_tailroom;
+
+ if (batch->skb_count >= nb_pkts)
+ goto build;
+
+ if (xs->skb) {
+ i = 1;
+ batch->skb_count++;
+ }
+
+ batch->skb_count += kmem_cache_alloc_bulk(net_hotdata.skbuff_cache,
+ gfp_mask, nb_pkts - batch->skb_count,
+ (void **)&skbs[batch->skb_count]);
+ if (batch->skb_count < nb_pkts)
+ nb_pkts = batch->skb_count;
+
+build:
+ for (i = 0, j = 0; j < nb_descs; j++) {
+ if (!xs->skb) {
+ u32 size = base_len + descs[j].len;
+
+ /* In case we don't have enough allocated skbs */
+ if (i >= nb_pkts) {
+ *err = -EAGAIN;
+ break;
+ }
+
+ if (sk_wmem_alloc_get(&xs->sk) > READ_ONCE(xs->sk.sk_sndbuf)) {
+ *err = -EAGAIN;
+ break;
+ }
+
+ skb = skbs[batch->skb_count - 1 - i];
+
+ prefetchw(skb);
+ /* We do our best to align skb_shared_info on a separate cache
+ * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives
+ * aligned memory blocks, unless SLUB/SLAB debug is enabled.
+ * Both skb->head and skb_shared_info are cache line aligned.
+ */
+ data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
+ if (unlikely(!data)) {
+ *err = -ENOBUFS;
+ break;
+ }
+ /* kmalloc_size_roundup() might give us more room than requested.
+ * Put skb_shared_info exactly at the end of allocated zone,
+ * to allow max possible filling before reallocation.
+ */
+ prefetchw(data + SKB_WITH_OVERHEAD(size));
+
+ memset(skb, 0, offsetof(struct sk_buff, tail));
+ __build_skb_around(skb, data, size);
+ skb->pfmemalloc = pfmemalloc;
+ skb_set_owner_w(skb, &xs->sk);
+ } else if (unlikely(i == 0)) {
+ /* We have a skb in cache that is left last time */
+ kmem_cache_free(net_hotdata.skbuff_cache,
+ skbs[batch->skb_count - 1]);
+ skbs[batch->skb_count - 1] = xs->skb;
+ }
+
+ skb = xsk_build_skb(xs, skb, &descs[j]);
+ if (IS_ERR(skb)) {
+ *err = PTR_ERR(skb);
+ break;
+ }
+
+ if (xp_mb_desc(&descs[j])) {
+ xs->skb = skb;
+ continue;
+ }
+
+ xs->skb = NULL;
+ i++;
+ __skb_queue_tail(&batch->send_queue, skb);
+ }
+
+ batch->skb_count -= i;
+
+ return j;
+}
+
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
* [BEEP] leaks.
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f9458347ff7b..cf45c7545124 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -1906,6 +1906,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
INIT_LIST_HEAD(&xs->map_list);
spin_lock_init(&xs->map_list_lock);
+ __skb_queue_head_init(&xs->batch.send_queue);
mutex_lock(&net->xdp.lock);
sk_add_node_rcu(sk, &net->xdp.list);
--
2.41.3
Powered by blists - more mailing lists