[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEtFFe3mVkXYjYJZtGdU=tAB+T5TYCqySzSxR2N5e4UV1A@mail.gmail.com>
Date: Mon, 28 Feb 2022 15:46:56 +0800
From: Jason Wang <jasowang@...hat.com>
To: Harold Huang <baymaxhuang@...il.com>
Cc: netdev <netdev@...r.kernel.org>, Paolo Abeni <pabeni@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
open list <linux-kernel@...r.kernel.org>,
"open list:XDP (eXpress Data Path)" <bpf@...r.kernel.org>
Subject: Re: [PATCH net-next v3] tun: support NAPI for packets received from
batched XDP buffs
On Mon, Feb 28, 2022 at 11:38 AM Harold Huang <baymaxhuang@...il.com> wrote:
>
> In tun, NAPI is supported and we can also use NAPI in the path of
> batched XDP buffs to accelerate packet processing. What is more, after
> we use NAPI, GRO is also supported. The iperf shows that the throughput of
> single stream could be improved from 4.5Gbps to 9.2Gbps. Additionally, 9.2
> Gbps nearly reachs the line speed of the phy nic and there is still about
> 15% idle cpu core remaining on the vhost thread.
>
> Test topology:
> [iperf server]<--->tap<--->dpdk testpmd<--->phy nic<--->[iperf client]
>
> Iperf stream:
> iperf3 -c 10.0.0.2 -i 1 -t 10
>
> Before:
> ...
> [ 5] 5.00-6.00 sec 558 MBytes 4.68 Gbits/sec 0 1.50 MBytes
> [ 5] 6.00-7.00 sec 556 MBytes 4.67 Gbits/sec 1 1.35 MBytes
> [ 5] 7.00-8.00 sec 556 MBytes 4.67 Gbits/sec 2 1.18 MBytes
> [ 5] 8.00-9.00 sec 559 MBytes 4.69 Gbits/sec 0 1.48 MBytes
> [ 5] 9.00-10.00 sec 556 MBytes 4.67 Gbits/sec 1 1.33 MBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bitrate Retr
> [ 5] 0.00-10.00 sec 5.39 GBytes 4.63 Gbits/sec 72 sender
> [ 5] 0.00-10.04 sec 5.39 GBytes 4.61 Gbits/sec receiver
>
> After:
> ...
> [ 5] 5.00-6.00 sec 1.07 GBytes 9.19 Gbits/sec 0 1.55 MBytes
> [ 5] 6.00-7.00 sec 1.08 GBytes 9.30 Gbits/sec 0 1.63 MBytes
> [ 5] 7.00-8.00 sec 1.08 GBytes 9.25 Gbits/sec 0 1.72 MBytes
> [ 5] 8.00-9.00 sec 1.08 GBytes 9.25 Gbits/sec 77 1.31 MBytes
> [ 5] 9.00-10.00 sec 1.08 GBytes 9.24 Gbits/sec 0 1.48 MBytes
> - - - - - - - - - - - - - - - - - - - - - - - - -
> [ ID] Interval Transfer Bitrate Retr
> [ 5] 0.00-10.00 sec 10.8 GBytes 9.28 Gbits/sec 166 sender
> [ 5] 0.00-10.04 sec 10.8 GBytes 9.24 Gbits/sec receiver
>
> Reported-at: https://lore.kernel.org/all/CACGkMEvTLG0Ayg+TtbN4q4pPW-ycgCCs3sC3-TF8cuRTf7Pp1A@mail.gmail.com
> Signed-off-by: Harold Huang <baymaxhuang@...il.com>
Acked-by: Jason Wang <jasowang@...hat.com>
> ---
> v2 -> v3
> - return the queued NAPI packet from tun_xdp_one
>
> drivers/net/tun.c | 43 ++++++++++++++++++++++++++++++-------------
> 1 file changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index fed85447701a..969ea69fd29d 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -2388,9 +2388,10 @@ static int tun_xdp_one(struct tun_struct *tun,
> struct virtio_net_hdr *gso = &hdr->gso;
> struct bpf_prog *xdp_prog;
> struct sk_buff *skb = NULL;
> + struct sk_buff_head *queue;
> u32 rxhash = 0, act;
> int buflen = hdr->buflen;
> - int err = 0;
> + int ret = 0;
> bool skb_xdp = false;
> struct page *page;
>
> @@ -2405,13 +2406,13 @@ static int tun_xdp_one(struct tun_struct *tun,
> xdp_set_data_meta_invalid(xdp);
>
> act = bpf_prog_run_xdp(xdp_prog, xdp);
> - err = tun_xdp_act(tun, xdp_prog, xdp, act);
> - if (err < 0) {
> + ret = tun_xdp_act(tun, xdp_prog, xdp, act);
> + if (ret < 0) {
> put_page(virt_to_head_page(xdp->data));
> - return err;
> + return ret;
> }
>
> - switch (err) {
> + switch (ret) {
> case XDP_REDIRECT:
> *flush = true;
> fallthrough;
> @@ -2435,7 +2436,7 @@ static int tun_xdp_one(struct tun_struct *tun,
> build:
> skb = build_skb(xdp->data_hard_start, buflen);
> if (!skb) {
> - err = -ENOMEM;
> + ret = -ENOMEM;
> goto out;
> }
>
> @@ -2445,7 +2446,7 @@ static int tun_xdp_one(struct tun_struct *tun,
> if (virtio_net_hdr_to_skb(skb, gso, tun_is_little_endian(tun))) {
> atomic_long_inc(&tun->rx_frame_errors);
> kfree_skb(skb);
> - err = -EINVAL;
> + ret = -EINVAL;
> goto out;
> }
>
> @@ -2455,16 +2456,27 @@ static int tun_xdp_one(struct tun_struct *tun,
> skb_record_rx_queue(skb, tfile->queue_index);
>
> if (skb_xdp) {
> - err = do_xdp_generic(xdp_prog, skb);
> - if (err != XDP_PASS)
> + ret = do_xdp_generic(xdp_prog, skb);
> + if (ret != XDP_PASS) {
> + ret = 0;
> goto out;
> + }
> }
>
> if (!rcu_dereference(tun->steering_prog) && tun->numqueues > 1 &&
> !tfile->detached)
> rxhash = __skb_get_hash_symmetric(skb);
>
> - netif_receive_skb(skb);
> + if (tfile->napi_enabled) {
> + queue = &tfile->sk.sk_write_queue;
> + spin_lock(&queue->lock);
> + __skb_queue_tail(queue, skb);
> + spin_unlock(&queue->lock);
> + ret = 1;
> + } else {
> + netif_receive_skb(skb);
> + ret = 0;
> + }
>
> /* No need to disable preemption here since this function is
> * always called with bh disabled
> @@ -2475,7 +2487,7 @@ static int tun_xdp_one(struct tun_struct *tun,
> tun_flow_update(tun, rxhash, tfile);
>
> out:
> - return err;
> + return ret;
> }
>
> static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> @@ -2492,7 +2504,7 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
> if (ctl && (ctl->type == TUN_MSG_PTR)) {
> struct tun_page tpage;
> int n = ctl->num;
> - int flush = 0;
> + int flush = 0, queued = 0;
>
> memset(&tpage, 0, sizeof(tpage));
>
> @@ -2501,12 +2513,17 @@ static int tun_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
>
> for (i = 0; i < n; i++) {
> xdp = &((struct xdp_buff *)ctl->ptr)[i];
> - tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
> + ret = tun_xdp_one(tun, tfile, xdp, &flush, &tpage);
> + if (ret > 0)
> + queued += ret;
> }
>
> if (flush)
> xdp_do_flush();
>
> + if (tfile->napi_enabled && queued > 0)
> + napi_schedule(&tfile->napi);
> +
> rcu_read_unlock();
> local_bh_enable();
>
> --
> 2.27.0
>
Powered by blists - more mailing lists