lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEtFFe3mVkXYjYJZtGdU=tAB+T5TYCqySzSxR2N5e4UV1A@mail.gmail.com>
Date:   Mon, 28 Feb 2022 15:46:56 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     Harold Huang <baymaxhuang@...il.com>
Cc:     netdev <netdev@...r.kernel.org>, Paolo Abeni <pabeni@...hat.com>,
        "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        John Fastabend <john.fastabend@...il.com>,
        open list <linux-kernel@...r.kernel.org>,
        "open list:XDP (eXpress Data Path)" <bpf@...r.kernel.org>
Subject: Re: [PATCH net-next v3] tun: support NAPI for packets received from
 batched XDP buffs

On Mon, Feb 28, 2022 at 11:38 AM Harold Huang <baymaxhuang@...il.com> wrote:
>
> In tun, NAPI is supported and we can also use NAPI in the path of
> batched XDP buffs to accelerate packet processing. What is more, after
> we use NAPI, GRO is also supported. The iperf shows that the throughput of
> single stream could be improved from 4.5Gbps to 9.2Gbps. Additionally, 9.2
> Gbps nearly reachs the line speed of the phy nic and there is still about
> 15% idle cpu core remaining on the vhost thread.
>
> Test topology:
> [iperf server]<--->tap<--->dpdk testpmd<--->phy nic<--->[iperf client]
>
> Iperf stream:
> iperf3 -c 10.0.0.2  -i 1 -t 10
>
> Before:
> ...
> [  5]   5.00-6.00   sec   558 MBytes  4.68 Gbits/sec    0   1.50 MBytes
> [  5]   6.00-7.00   sec   556 MBytes  4.67 Gbits/sec    1   1.35 MBytes
> [  5]   7.00-8.00   sec   556 MBytes  4.67 Gbits/sec    2   1.18 MBytes
> [  5]   8.00-9.00   sec   559 MBytes  4.69 Gbits/sec    0   1.48 MBytes
> [  5]   9.00-10.00  sec   556 MBytes  4.67 Gbits/sec    1   1.33 MBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-10.00  sec  5.39 GBytes  4.63 Gbits/sec   72          sender
> [  5]   0.00-10.04  sec  5.39 GBytes  4.61 Gbits/sec               receiver
>
> After:
> ...
> [  5]   5.00-6.00   sec  1.07 GBytes  9.19 Gbits/sec    0   1.55 MBytes
> [  5]   6.00-7.00   sec  1.08 GBytes  9.30 Gbits/sec    0   1.63 MBytes
> [  5]   7.00-8.00   sec  1.08 GBytes  9.25 Gbits/sec    0   1.72 MBytes
> [  5]   8.00-9.00   sec  1.08 GBytes  9.25 Gbits/sec   77   1.31 MBytes
> [  5]   9.00-10.00  sec  1.08 GBytes  9.24 Gbits/sec    0   1.48 MBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval           Transfer     Bitrate         Retr
> [  5]   0.00-10.00  sec  10.8 GBytes  9.28 Gbits/sec  166          sender
> [  5]   0.00-10.04  sec  10.8 GBytes  9.24 Gbits/sec               receiver
>
> Reported-at: https://lore.kernel.org/all/CACGkMEvTLG0Ayg+TtbN4q4pPW-ycgCCs3sC3-TF8cuRTf7Pp1A@mail.gmail.com
> Signed-off-by: Harold Huang <baymaxhuang@...il.com>

Acked-by: Jason Wang <jasowang@...hat.com>

> ---
> v2 -> v3
>  - return the queued NAPI packet from tun_xdp_one
>
>  drivers/net/tun.c | 43 ++++++++++++++++++++++++++++++-------------
>  1 file changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index fed85447701a..969ea69fd29d 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2388,9 +2388,10 @@ static int tun_xdp_one(struct tun_struct *tun,
>         struct virtio_net_hdr *gso = &hdr->gso;
>         struct bpf_prog *xdp_prog;
>         struct sk_buff *skb = NULL;
> +       struct sk_buff_head *queue;
>         u32 rxhash = 0, act;
>         int buflen = hdr->buflen;
> -       int err = 0;
> +       int ret = 0;
>         bool skb_xdp = false;
>         struct page *page;
>
> @@ -2405,13 +2406,13 @@ static int tun_xdp_one(struct tun_struct *tun,
>                 xdp_set_data_meta_invalid(xdp);
>
>                 act = bpf_prog_run_xdp(xdp_prog, xdp);
> -               err = tun_xdp_act(tun, xdp_prog, xdp, act);
> -               if (err < 0) {
> +               ret = tun_xdp_act(tun, xdp_prog, xdp, act);
> +               if (ret < 0) {
>                         put_page(virt_to_head_page(xdp->data));
> -                       return err;
> +                       return ret;
>                 }
>
> -               switch (err) {
> +               switch (ret) {
>                 case XDP_REDIRECT:
>                         *flush = true;
>                         fallthrough;
> @@ -2435,7 +2436,7 @@ static int tun_xdp_one(struct tun_struct *tun,
>  build:
>         skb = build_skb(xdp->data_hard_start, buflen);
>         if (!skb) {
> -               err = -ENOMEM;
> +               ret = -ENOMEM;
>                 goto out;
>         }
>
> @@ -2445,7 +2446,7 @@ static int tun_xdp_one(struct tun_struct *tun,
>         if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
>                 atomic_long_inc(&tun->rx_frame_errors);
>                 kfree_skb(skb);
> -               err = -EINVAL;
> +               ret = -EINVAL;
>                 goto out;
>         }
>
> @@ -2455,16 +2456,27 @@ static int tun_xdp_one(struct tun_struct *tun,
>         skb_record_rx_queue(skb, tfile->queue_index);
>
>         if (skb_xdp) {
> -               err = do_xdp_generic(xdp_prog, skb);
> -               if (err != XDP_PASS)
> +               ret = do_xdp_generic(xdp_prog, skb);
> +               if (ret != XDP_PASS) {
> +                       ret = 0;
>                         goto out;
> +               }
>         }
>
>         if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
>             !tfile->detached)
>                 rxhash = __skb_get_hash_symmetric(skb);
>
> -       netif_receive_skb(skb);
> +       if (tfile->napi_enabled) {
> +               queue = &tfile->sk.sk_write_queue;
> +               spin_lock(&queue->lock);
> +               __skb_queue_tail(queue, skb);
> +               spin_unlock(&queue->lock);
> +               ret = 1;
> +       } else {
> +               netif_receive_skb(skb);
> +               ret = 0;
> +       }
>
>         /* No need to disable preemption here since this function is
>          * always called with bh disabled
> @@ -2475,7 +2487,7 @@ static int tun_xdp_one(struct tun_struct *tun,
>                 tun_flow_update(tun, rxhash, tfile);
>
>  out:
> -       return err;
> +       return ret;
>  }
>
>  static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> @@ -2492,7 +2504,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>         if (ctl && (ctl->type == TUN_MSG_PTR)) {
>                 struct tun_page tpage;
>                 int n = ctl->num;
> -               int flush = 0;
> +               int flush = 0, queued = 0;
>
>                 memset(&tpage, 0, sizeof(tpage));
>
> @@ -2501,12 +2513,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>
>                 for (i = 0; i < n; i++) {
>                         xdp = &((struct xdp_buff *)ctl->ptr)[i];
> -                       tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
> +                       ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
> +                       if (ret > 0)
> +                               queued += ret;
>                 }
>
>                 if (flush)
>                         xdp_do_flush();
>
> +               if (tfile->napi_enabled && queued > 0)
> +                       napi_schedule(&tfile->napi);
> +
>                 rcu_read_unlock();
>                 local_bh_enable();
>
> --
> 2.27.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ