lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20251125200041.1565663-6-jon@nutanix.com>
Date: Tue, 25 Nov 2025 13:00:32 -0700
From: Jon Kohler <jon@...anix.com>
To: netdev@...r.kernel.org, Willem de Bruijn <willemdebruijn.kernel@...il.com>,
        Jason Wang <jasowang@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        John Fastabend <john.fastabend@...il.com>,
        Stanislav Fomichev <sdf@...ichev.me>,
        linux-kernel@...r.kernel.org (open list),
        bpf@...r.kernel.org (open list:XDP (eXpress Data Path):Keyword:(?:\b|_)xdp(?:\b|_))
Cc: Jon Kohler <jon@...anix.com>
Subject: [PATCH net-next v2 5/9] tun: use bulk NAPI cache allocation in tun_xdp_one

Optimize TUN_MSG_PTR batch processing by allocating sk_buff structures
in bulk from the per-CPU NAPI cache using napi_skb_cache_get_bulk.
This reduces allocation overhead and improves efficiency, especially
when IFF_NAPI is enabled and GRO is feeding entries back to the cache.

If bulk allocation cannot fully satisfy the batch, gracefully drop only
the uncovered portion, allowing the rest of the batch to proceed, which
is what already happens in the previous case where build_skb() would
fail and return -ENOMEM.

Signed-off-by: Jon Kohler <jon@...anix.com>
---
 drivers/net/tun.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 97f130bc5fed..64f944cce517 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2420,13 +2420,13 @@ static void tun_put_page(struct tun_page *tpage)
 static int tun_xdp_one(struct tun_struct *tun,
 		       struct tun_file *tfile,
 		       struct xdp_buff *xdp, int *flush,
-		       struct tun_page *tpage)
+		       struct tun_page *tpage,
+		       struct sk_buff *skb)
 {
 	unsigned int datasize = xdp->data_end - xdp->data;
 	struct virtio_net_hdr *gso = xdp->data_hard_start;
 	struct virtio_net_hdr_v1_hash_tunnel *tnl_hdr;
 	struct bpf_prog *xdp_prog;
-	struct sk_buff *skb = NULL;
 	struct sk_buff_head *queue;
 	netdev_features_t features;
 	u32 rxhash = 0, act;
@@ -2437,6 +2437,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 	struct page *page;
 
 	if (unlikely(datasize < ETH_HLEN)) {
+		kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_SMALL);
 		dev_core_stats_rx_dropped_inc(tun->dev);
 		return -EINVAL;
 	}
@@ -2454,6 +2455,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
 		if (ret < 0) {
 			/* tun_xdp_act already handles drop statistics */
+			kfree_skb_reason(skb, SKB_DROP_REASON_XDP);
 			put_page(virt_to_head_page(xdp->data));
 			return ret;
 		}
@@ -2463,6 +2465,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 			*flush = true;
 			fallthrough;
 		case XDP_TX:
+			napi_consume_skb(skb, 1);
 			return 0;
 		case XDP_PASS:
 			break;
@@ -2475,13 +2478,15 @@ static int tun_xdp_one(struct tun_struct *tun,
 				tpage->page = page;
 				tpage->count = 1;
 			}
+			napi_consume_skb(skb, 1);
 			return 0;
 		}
 	}
 
 build:
-	skb = build_skb(xdp->data_hard_start, buflen);
+	skb = build_skb_around(skb, xdp->data_hard_start, buflen);
 	if (!skb) {
+		kfree_skb_reason(skb, SKB_DROP_REASON_NOMEM);
 		dev_core_stats_rx_dropped_inc(tun->dev);
 		return -ENOMEM;
 	}
@@ -2566,9 +2571,11 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 	if (m->msg_controllen == sizeof(struct tun_msg_ctl) &&
 	    ctl && ctl->type == TUN_MSG_PTR) {
 		struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx;
+		int flush = 0, queued = 0, num_skbs = 0;
 		struct tun_page tpage;
 		int n = ctl->num;
-		int flush = 0, queued = 0;
+		/* Max size of VHOST_NET_BATCH */
+		void *skbs[64];
 
 		memset(&tpage, 0, sizeof(tpage));
 
@@ -2576,13 +2583,24 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		rcu_read_lock();
 		bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
 
-		for (i = 0; i < n; i++) {
+		num_skbs = napi_skb_cache_get_bulk(skbs, n);
+
+		for (i = 0; i < num_skbs; i++) {
+			struct sk_buff *skb = skbs[i];
 			xdp = &((struct xdp_buff *)ctl->ptr)[i];
-			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
+			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
+					  skb);
 			if (ret > 0)
 				queued += ret;
 		}
 
+		/* Handle remaining xdp_buff entries if num_skbs < ctl->num */
+		for (i = num_skbs; i < ctl->num; i++) {
+			xdp = &((struct xdp_buff *)ctl->ptr)[i];
+			dev_core_stats_rx_dropped_inc(tun->dev);
+			put_page(virt_to_head_page(xdp->data));
+		}
+
 		if (flush)
 			xdp_do_flush();
 
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ