[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251125200041.1565663-6-jon@nutanix.com>
Date: Tue, 25 Nov 2025 13:00:32 -0700
From: Jon Kohler <jon@...anix.com>
To: netdev@...r.kernel.org, Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Jason Wang <jasowang@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Stanislav Fomichev <sdf@...ichev.me>,
linux-kernel@...r.kernel.org (open list),
bpf@...r.kernel.org (open list:XDP (eXpress Data Path):Keyword:(?:\b|_)xdp(?:\b|_))
Cc: Jon Kohler <jon@...anix.com>
Subject: [PATCH net-next v2 5/9] tun: use bulk NAPI cache allocation in tun_xdp_one
Optimize TUN_MSG_PTR batch processing by allocating sk_buff structures
in bulk from the per-CPU NAPI cache using napi_skb_cache_get_bulk.
This reduces allocation overhead and improves efficiency, especially
when IFF_NAPI is enabled and GRO is feeding entries back to the cache.
If bulk allocation cannot fully satisfy the batch, gracefully drop only
the uncovered portion, allowing the rest of the batch to proceed, which
is what already happens in the previous case where build_skb() would
fail and return -ENOMEM.
Signed-off-by: Jon Kohler <jon@...anix.com>
---
drivers/net/tun.c | 30 ++++++++++++++++++++++++------
1 file changed, 24 insertions(+), 6 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 97f130bc5fed..64f944cce517 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2420,13 +2420,13 @@ static void tun_put_page(struct tun_page *tpage)
static int tun_xdp_one(struct tun_struct *tun,
struct tun_file *tfile,
struct xdp_buff *xdp, int *flush,
- struct tun_page *tpage)
+ struct tun_page *tpage,
+ struct sk_buff *skb)
{
unsigned int datasize = xdp->data_end - xdp->data;
struct virtio_net_hdr *gso = xdp->data_hard_start;
struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
struct bpf_prog *xdp_prog;
- struct sk_buff *skb = NULL;
struct sk_buff_head *queue;
netdev_features_t features;
u32 rxhash = 0, act;
@@ -2437,6 +2437,7 @@ static int tun_xdp_one(struct tun_struct *tun,
struct page *page;
if (unlikely(datasize < ETH_HLEN)) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_SMALL);
dev_core_stats_rx_dropped_inc(tun->dev);
return -EINVAL;
}
@@ -2454,6 +2455,7 @@ static int tun_xdp_one(struct tun_struct *tun,
ret = tun_xdp_act(tun, xdp_prog, xdp, act);
if (ret < 0) {
/* tun_xdp_act already handles drop statistics */
+ kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
put_page(virt_to_head_page(xdp->data));
return ret;
}
@@ -2463,6 +2465,7 @@ static int tun_xdp_one(struct tun_struct *tun,
*flush = true;
fallthrough;
case XDP_TX:
+ napi_consume_skb(skb, 1);
return 0;
case XDP_PASS:
break;
@@ -2475,13 +2478,15 @@ static int tun_xdp_one(struct tun_struct *tun,
tpage->page = page;
tpage->count = 1;
}
+ napi_consume_skb(skb, 1);
return 0;
}
}
build:
- skb = build_skb(xdp->data_hard_start, buflen);
+ skb = build_skb_around(skb, xdp->data_hard_start, buflen);
if (!skb) {
+ kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
dev_core_stats_rx_dropped_inc(tun->dev);
return -ENOMEM;
}
@@ -2566,9 +2571,11 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
ctl && ctl->type == TUN_MSG_PTR) {
struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+ int flush = 0, queued = 0, num_skbs = 0;
struct tun_page tpage;
int n = ctl->num;
- int flush = 0, queued = 0;
+ /* Max size of VHOST_NET_BATCH */
+ void *skbs[64];
memset(&tpage, 0, sizeof(tpage));
@@ -2576,13 +2583,24 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
rcu_read_lock();
bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
- for (i = 0; i < n; i++) {
+ num_skbs = napi_skb_cache_get_bulk(skbs, n);
+
+ for (i = 0; i < num_skbs; i++) {
+ struct sk_buff *skb = skbs[i];
xdp = &((struct xdp_buff *)ctl->ptr)[i];
- ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+ ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
+ skb);
if (ret > 0)
queued += ret;
}
+ /* Handle remaining xdp_buff entries if num_skbs < ctl->num */
+ for (i = num_skbs; i < ctl->num; i++) {
+ xdp = &((struct xdp_buff *)ctl->ptr)[i];
+ dev_core_stats_rx_dropped_inc(tun->dev);
+ put_page(virt_to_head_page(xdp->data));
+ }
+
if (flush)
xdp_do_flush();
--
2.43.0
Powered by blists - more mailing lists