lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <681bc78dc5005_20dc6429460@willemb.c.googlers.com.notmuch>
Date: Wed, 07 May 2025 16:50:21 -0400
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Jon Kohler <jon@...anix.com>, 
 ast@...nel.org, 
 daniel@...earbox.net, 
 davem@...emloft.net, 
 kuba@...nel.org, 
 hawk@...nel.org, 
 john.fastabend@...il.com, 
 netdev@...r.kernel.org, 
 bpf@...r.kernel.org, 
 jon@...anix.com, 
 aleksander.lobakin@...el.com, 
 Willem de Bruijn <willemdebruijn.kernel@...il.com>, 
 Jason Wang <jasowang@...hat.com>, 
 Andrew Lunn <andrew+netdev@...n.ch>, 
 Eric Dumazet <edumazet@...gle.com>, 
 Paolo Abeni <pabeni@...hat.com>, 
 linux-kernel@...r.kernel.org (open list)
Subject: Re: [PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one

Jon Kohler wrote:
> Enhance TUN_MSG_PTR batch processing by leveraging bulk allocation from
> the per-CPU NAPI cache via napi_skb_cache_get_bulk. This improves
> efficiency by reducing allocation overhead and is especially useful
> when using IFF_NAPI and GRO is able to feed the cache entries back.
> 
> Handle scenarios where full preallocation of SKBs is not possible by
> gracefully dropping only the uncovered portion of the batch payload.
> 
> Cc: Alexander Lobakin <aleksander.lobakin@...el.com>
> Signed-off-by: Jon Kohler <jon@...anix.com>
> ---
>  drivers/net/tun.c | 39 +++++++++++++++++++++++++++------------
>  1 file changed, 27 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 87fc51916fce..f7f7490e78dc 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2354,12 +2354,12 @@ static int tun_xdp_one(struct tun_struct *tun,
>  		       struct tun_file *tfile,
>  		       struct xdp_buff *xdp, int *flush,
>  		       struct tun_page *tpage,
> -		       struct bpf_prog *xdp_prog)
> +		       struct bpf_prog *xdp_prog,
> +		       struct sk_buff *skb)
>  {
>  	unsigned int datasize = xdp->data_end - xdp->data;
>  	struct tun_xdp_hdr *hdr = xdp->data_hard_start;
>  	struct virtio_net_hdr *gso = &hdr->gso;
> -	struct sk_buff *skb = NULL;
>  	struct sk_buff_head *queue;
>  	u32 rxhash = 0, act;
>  	int buflen = hdr->buflen;
> @@ -2381,16 +2381,15 @@ static int tun_xdp_one(struct tun_struct *tun,
>  
>  		act = bpf_prog_run_xdp(xdp_prog, xdp);
>  		ret = tun_xdp_act(tun, xdp_prog, xdp, act);
> -		if (ret < 0) {
> -			put_page(virt_to_head_page(xdp->data));
> +		if (ret < 0)
>  			return ret;
> -		}
>  
>  		switch (ret) {
>  		case XDP_REDIRECT:
>  			*flush = true;
>  			fallthrough;
>  		case XDP_TX:
> +			napi_consume_skb(skb, 1);
>  			return 0;
>  		case XDP_PASS:
>  			break;
> @@ -2403,13 +2402,14 @@ static int tun_xdp_one(struct tun_struct *tun,
>  				tpage->page = page;
>  				tpage->count = 1;
>  			}
> +			napi_consume_skb(skb, 1);
>  			return 0;
>  		}
>  	}
>  
>  build:
> -	skb = build_skb(xdp->data_hard_start, buflen);
> -	if (!skb) {
> +	skb = build_skb_around(skb, xdp->data_hard_start, buflen);
> +	if (unlikely(!skb)) {
>  		ret = -ENOMEM;
>  		goto out;
>  	}
> @@ -2427,7 +2427,6 @@ static int tun_xdp_one(struct tun_struct *tun,
>  
>  	if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
>  		atomic_long_inc(&tun->rx_frame_errors);
> -		kfree_skb(skb);
>  		ret = -EINVAL;
>  		goto out;
>  	}
> @@ -2455,7 +2454,6 @@ static int tun_xdp_one(struct tun_struct *tun,
>  
>  		if (unlikely(tfile->detached)) {
>  			spin_unlock(&queue->lock);
> -			kfree_skb(skb);
>  			return -EBUSY;
>  		}
>  
> @@ -2496,7 +2494,9 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>  		struct bpf_prog *xdp_prog;
>  		struct tun_page tpage;
>  		int n = ctl->num;
> -		int flush = 0, queued = 0;
> +		int flush = 0, queued = 0, num_skbs = 0;
> +		/* Max size of VHOST_NET_BATCH */
> +		void *skbs[64];
>  
>  		memset(&tpage, 0, sizeof(tpage));
>  
> @@ -2505,12 +2505,27 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>  		bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
>  		xdp_prog = rcu_dereference(tun->xdp_prog);
>  
> -		for (i = 0; i < n; i++) {
> +		num_skbs = napi_skb_cache_get_bulk(skbs, n);
> +
> +		for (i = 0; i < num_skbs; i++) {
> +			struct sk_buff *skb = skbs[i];
>  			xdp = &((struct xdp_buff *)ctl->ptr)[i];
>  			ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
> -					  xdp_prog);
> +					  xdp_prog, skb);
>  			if (ret > 0)
>  				queued += ret;
> +			else if (ret < 0) {
> +				dev_core_stats_rx_dropped_inc(tun->dev);
> +				napi_consume_skb(skb, 1);
> +				put_page(virt_to_head_page(xdp->data));
> +			}
> +		}
> +
> +		/* Handle remaining xdp_buff entries if num_skbs < ctl->num */
> +		for (i = num_skbs; i < ctl->num; i++) {
> +			xdp = &((struct xdp_buff *)ctl->ptr)[i];
> +			dev_core_stats_rx_dropped_inc(tun->dev);
> +			put_page(virt_to_head_page(xdp->data));

The code should attempt to send out packets rather than drop them.



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ