lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250506145530.2877229-3-jon@nutanix.com>
Date: Tue,  6 May 2025 07:55:27 -0700
From: Jon Kohler <jon@...anix.com>
To: ast@...nel.org, daniel@...earbox.net, davem@...emloft.net, kuba@...nel.org,
        hawk@...nel.org, john.fastabend@...il.com, netdev@...r.kernel.org,
        bpf@...r.kernel.org, jon@...anix.com, aleksander.lobakin@...el.com,
        Willem de Bruijn <willemdebruijn.kernel@...il.com>,
        Jason Wang <jasowang@...hat.com>, Andrew Lunn <andrew+netdev@...n.ch>,
        Eric Dumazet <edumazet@...gle.com>, Paolo Abeni <pabeni@...hat.com>,
        linux-kernel@...r.kernel.org (open list)
Subject: [PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one

Enhance TUN_MSG_PTR batch processing by leveraging bulk allocation from
the per-CPU NAPI cache via napi_skb_cache_get_bulk. This improves
efficiency by reducing allocation overhead and is especially useful
when using IFF_NAPI and GRO is able to feed the cache entries back.

Handle scenarios where full preallocation of SKBs is not possible by
gracefully dropping only the uncovered portion of the batch payload.

Cc: Alexander Lobakin <aleksander.lobakin@...el.com>
Signed-off-by: Jon Kohler <jon@...anix.com>
---
 drivers/net/tun.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 87fc51916fce..f7f7490e78dc 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2354,12 +2354,12 @@ static int tun_xdp_one(struct tun_struct *tun,
 		       struct tun_file *tfile,
 		       struct xdp_buff *xdp, int *flush,
 		       struct tun_page *tpage,
-		       struct bpf_prog *xdp_prog)
+		       struct bpf_prog *xdp_prog,
+		       struct sk_buff *skb)
 {
 	unsigned int datasize = xdp->data_end - xdp->data;
 	struct tun_xdp_hdr *hdr = xdp->data_hard_start;
 	struct virtio_net_hdr *gso = &hdr->gso;
-	struct sk_buff *skb = NULL;
 	struct sk_buff_head *queue;
 	u32 rxhash = 0, act;
 	int buflen = hdr->buflen;
@@ -2381,16 +2381,15 @@ static int tun_xdp_one(struct tun_struct *tun,
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
 		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
-		if (ret < 0) {
-			put_page(virt_to_head_page(xdp->data));
+		if (ret < 0)
 			return ret;
-		}
 
 		switch (ret) {
 		case XDP_REDIRECT:
 			*flush = true;
 			fallthrough;
 		case XDP_TX:
+			napi_consume_skb(skb, 1);
 			return 0;
 		case XDP_PASS:
 			break;
@@ -2403,13 +2402,14 @@ static int tun_xdp_one(struct tun_struct *tun,
 				tpage->page = page;
 				tpage->count = 1;
 			}
+			napi_consume_skb(skb, 1);
 			return 0;
 		}
 	}
 
 build:
-	skb = build_skb(xdp->data_hard_start, buflen);
-	if (!skb) {
+	skb = build_skb_around(skb, xdp->data_hard_start, buflen);
+	if (unlikely(!skb)) {
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -2427,7 +2427,6 @@ static int tun_xdp_one(struct tun_struct *tun,
 
 	if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
 		atomic_long_inc(&tun->rx_frame_errors);
-		kfree_skb(skb);
 		ret = -EINVAL;
 		goto out;
 	}
@@ -2455,7 +2454,6 @@ static int tun_xdp_one(struct tun_struct *tun,
 
 		if (unlikely(tfile->detached)) {
 			spin_unlock(&queue->lock);
-			kfree_skb(skb);
 			return -EBUSY;
 		}
 
@@ -2496,7 +2494,9 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		struct bpf_prog *xdp_prog;
 		struct tun_page tpage;
 		int n = ctl->num;
-		int flush = 0, queued = 0;
+		int flush = 0, queued = 0, num_skbs = 0;
+		/* Max size of VHOST_NET_BATCH */
+		void *skbs[64];
 
 		memset(&tpage, 0, sizeof(tpage));
 
@@ -2505,12 +2505,27 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
 		bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
 		xdp_prog = rcu_dereference(tun->xdp_prog);
 
-		for (i = 0; i < n; i++) {
+		num_skbs = napi_skb_cache_get_bulk(skbs, n);
+
+		for (i = 0; i < num_skbs; i++) {
+			struct sk_buff *skb = skbs[i];
 			xdp = &((struct xdp_buff *)ctl->ptr)[i];
 			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
-					  xdp_prog);
+					  xdp_prog, skb);
 			if (ret > 0)
 				queued += ret;
+			else if (ret < 0) {
+				dev_core_stats_rx_dropped_inc(tun->dev);
+				napi_consume_skb(skb, 1);
+				put_page(virt_to_head_page(xdp->data));
+			}
+		}
+
+		/* Handle remaining xdp_buff entries if num_skbs < ctl->num */
+		for (i = num_skbs; i < ctl->num; i++) {
+			xdp = &((struct xdp_buff *)ctl->ptr)[i];
+			dev_core_stats_rx_dropped_inc(tun->dev);
+			put_page(virt_to_head_page(xdp->data));
 		}
 
 		if (flush)
-- 
2.43.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ