[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <681bc78dc5005_20dc6429460@willemb.c.googlers.com.notmuch>
Date: Wed, 07 May 2025 16:50:21 -0400
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Jon Kohler <jon@...anix.com>,
ast@...nel.org,
daniel@...earbox.net,
davem@...emloft.net,
kuba@...nel.org,
hawk@...nel.org,
john.fastabend@...il.com,
netdev@...r.kernel.org,
bpf@...r.kernel.org,
jon@...anix.com,
aleksander.lobakin@...el.com,
Willem de Bruijn <willemdebruijn.kernel@...il.com>,
Jason Wang <jasowang@...hat.com>,
Andrew Lunn <andrew+netdev@...n.ch>,
Eric Dumazet <edumazet@...gle.com>,
Paolo Abeni <pabeni@...hat.com>,
linux-kernel@...r.kernel.org (open list)
Subject: Re: [PATCH net-next 2/4] tun: optimize skb allocation in tun_xdp_one
Jon Kohler wrote:
> Enhance TUN_MSG_PTR batch processing by leveraging bulk allocation from
> the per-CPU NAPI cache via napi_skb_cache_get_bulk. This improves
> efficiency by reducing allocation overhead and is especially useful
> when using IFF_NAPI and GRO is able to feed the cache entries back.
>
> Handle scenarios where full preallocation of SKBs is not possible by
> gracefully dropping only the uncovered portion of the batch payload.
>
> Cc: Alexander Lobakin <aleksander.lobakin@...el.com>
> Signed-off-by: Jon Kohler <jon@...anix.com>
> ---
> drivers/net/tun.c | 39 +++++++++++++++++++++++++++------------
> 1 file changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 87fc51916fce..f7f7490e78dc 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2354,12 +2354,12 @@ static int tun_xdp_one(struct tun_struct *tun,
> struct tun_file *tfile,
> struct xdp_buff *xdp, int *flush,
> struct tun_page *tpage,
> - struct bpf_prog *xdp_prog)
> + struct bpf_prog *xdp_prog,
> + struct sk_buff *skb)
> {
> unsigned int datasize = xdp->data_end - xdp->data;
> struct tun_xdp_hdr *hdr = xdp->data_hard_start;
> struct virtio_net_hdr *gso = &hdr->gso;
> - struct sk_buff *skb = NULL;
> struct sk_buff_head *queue;
> u32 rxhash = 0, act;
> int buflen = hdr->buflen;
> @@ -2381,16 +2381,15 @@ static int tun_xdp_one(struct tun_struct *tun,
>
> act = bpf_prog_run_xdp(xdp_prog, xdp);
> ret = tun_xdp_act(tun, xdp_prog, xdp, act);
> - if (ret < 0) {
> - put_page(virt_to_head_page(xdp->data));
> + if (ret < 0)
> return ret;
> - }
>
> switch (ret) {
> case XDP_REDIRECT:
> *flush = true;
> fallthrough;
> case XDP_TX:
> + napi_consume_skb(skb, 1);
> return 0;
> case XDP_PASS:
> break;
> @@ -2403,13 +2402,14 @@ static int tun_xdp_one(struct tun_struct *tun,
> tpage->page = page;
> tpage->count = 1;
> }
> + napi_consume_skb(skb, 1);
> return 0;
> }
> }
>
> build:
> - skb = build_skb(xdp->data_hard_start, buflen);
> - if (!skb) {
> + skb = build_skb_around(skb, xdp->data_hard_start, buflen);
> + if (unlikely(!skb)) {
> ret = -ENOMEM;
> goto out;
> }
> @@ -2427,7 +2427,6 @@ static int tun_xdp_one(struct tun_struct *tun,
>
> if (tun_vnet_hdr_to_skb(tun->flags, skb, gso)) {
> atomic_long_inc(&tun->rx_frame_errors);
> - kfree_skb(skb);
> ret = -EINVAL;
> goto out;
> }
> @@ -2455,7 +2454,6 @@ static int tun_xdp_one(struct tun_struct *tun,
>
> if (unlikely(tfile->detached)) {
> spin_unlock(&queue->lock);
> - kfree_skb(skb);
> return -EBUSY;
> }
>
> @@ -2496,7 +2494,9 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> struct bpf_prog *xdp_prog;
> struct tun_page tpage;
> int n = ctl->num;
> - int flush = 0, queued = 0;
> + int flush = 0, queued = 0, num_skbs = 0;
> + /* Max size of VHOST_NET_BATCH */
> + void *skbs[64];
>
> memset(&tpage, 0, sizeof(tpage));
>
> @@ -2505,12 +2505,27 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> bpf_net_ctx = bpf_net_ctx_set(&__bpf_net_ctx);
> xdp_prog = rcu_dereference(tun->xdp_prog);
>
> - for (i = 0; i < n; i++) {
> + num_skbs = napi_skb_cache_get_bulk(skbs, n);
> +
> + for (i = 0; i < num_skbs; i++) {
> + struct sk_buff *skb = skbs[i];
> xdp = &((struct xdp_buff *)ctl->ptr)[i];
> ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage,
> - xdp_prog);
> + xdp_prog, skb);
> if (ret > 0)
> queued += ret;
> + else if (ret < 0) {
> + dev_core_stats_rx_dropped_inc(tun->dev);
> + napi_consume_skb(skb, 1);
> + put_page(virt_to_head_page(xdp->data));
> + }
> + }
> +
> + /* Handle remaining xdp_buff entries if num_skbs < ctl->num */
> + for (i = num_skbs; i < ctl->num; i++) {
> + xdp = &((struct xdp_buff *)ctl->ptr)[i];
> + dev_core_stats_rx_dropped_inc(tun->dev);
> + put_page(virt_to_head_page(xdp->data));
The code should attempt to send out packets rather than drop them.
Powered by blists - more mailing lists