netdev - Re: [PATCH bpf-next] xsk: build skb by page

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJ8uoz2Enx-WwY6RmCp0RXBG2U3BUpagw-X8hQChPResHCM-XA@mail.gmail.com>
Date:   Wed, 23 Dec 2020 11:04:19 +0100
From:   Magnus Karlsson <magnus.karlsson@...il.com>
To:     Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
Cc:     "Karlsson, Magnus" <magnus.karlsson@...el.com>,
        Björn Töpel <bjorn.topel@...el.com>,
        Jonathan Lemon <jonathan.lemon@...il.com>,
        "David S. Miller" <davem@...emloft.net>,
        Jakub Kicinski <kuba@...nel.org>,
        Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        John Fastabend <john.fastabend@...il.com>,
        Andrii Nakryiko <andrii@...nel.org>,
        Martin KaFai Lau <kafai@...com>,
        Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
        KP Singh <kpsingh@...nel.org>,
        "open list:XDP SOCKETS (AF_XDP)" <netdev@...r.kernel.org>,
        "open list:XDP SOCKETS (AF_XDP)" <bpf@...r.kernel.org>,
        open list <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH bpf-next] xsk: build skb by page

On Wed, Dec 23, 2020 at 9:57 AM Xuan Zhuo <xuanzhuo@...ux.alibaba.com> wrote:
>
> This patch is used to construct skb based on page to save memory copy
> overhead.
>
> Taking into account the problem of addr unaligned, and the
> possibility of frame size greater than page in the future.

Thanks Xuan for the patch set. Could you please share performance
numbers so we know how much this buys us? Would be good if you could
produce them for 64 bytes, 1500 bytes and something in the middle so
we can judge the benefits of this.

Please note that responses will be delayed this week and next due to
the Christmas and New Years holidays over here.

> Signed-off-by: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
> ---
>  net/xdp/xsk.c | 68 ++++++++++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 51 insertions(+), 17 deletions(-)
>
> diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
> index ac4a317..7cab40f 100644
> --- a/net/xdp/xsk.c
> +++ b/net/xdp/xsk.c
> @@ -430,6 +430,55 @@ static void xsk_destruct_skb(struct sk_buff *skb)
>         sock_wfree(skb);
>  }
>
> +static struct sk_buff *xsk_build_skb_bypage(struct xdp_sock *xs, struct xdp_desc *desc)
> +{
> +       char *buffer;
> +       u64 addr;
> +       u32 len, offset, copy, copied;
> +       int err, i;
> +       struct page *page;
> +       struct sk_buff *skb;
> +
> +       skb = sock_alloc_send_skb(&xs->sk, 0, 1, &err);
> +       if (unlikely(!skb))
> +               return NULL;
> +
> +       addr = desc->addr;
> +       len = desc->len;
> +
> +       buffer = xsk_buff_raw_get_data(xs->pool, addr);
> +       offset = offset_in_page(buffer);
> +       addr = buffer - (char *)xs->pool->addrs;
> +
> +       for (copied = 0, i = 0; copied < len; ++i) {
> +               page = xs->pool->umem->pgs[addr >> PAGE_SHIFT];
> +
> +               get_page(page);
> +
> +               copy = min((u32)(PAGE_SIZE - offset), len - copied);
> +
> +               skb_fill_page_desc(skb, i, page, offset, copy);
> +
> +               copied += copy;
> +               addr += copy;
> +               offset = 0;
> +       }
> +
> +       skb->len += len;
> +       skb->data_len += len;
> +       skb->truesize += len;
> +
> +       refcount_add(len, &xs->sk.sk_wmem_alloc);
> +
> +       skb->dev = xs->dev;
> +       skb->priority = xs->sk.sk_priority;
> +       skb->mark = xs->sk.sk_mark;
> +       skb_shinfo(skb)->destructor_arg = (void *)(long)addr;
> +       skb->destructor = xsk_destruct_skb;
> +
> +       return skb;
> +}
> +
>  static int xsk_generic_xmit(struct sock *sk)
>  {
>         struct xdp_sock *xs = xdp_sk(sk);
> @@ -445,40 +494,25 @@ static int xsk_generic_xmit(struct sock *sk)
>                 goto out;
>
>         while (xskq_cons_peek_desc(xs->tx, &desc, xs->pool)) {
> -               char *buffer;
> -               u64 addr;
> -               u32 len;
> -
>                 if (max_batch-- == 0) {
>                         err = -EAGAIN;
>                         goto out;
>                 }
>
> -               len = desc.len;
> -               skb = sock_alloc_send_skb(sk, len, 1, &err);
> +               skb = xsk_build_skb_bypage(xs, &desc);
>                 if (unlikely(!skb))
>                         goto out;
>
> -               skb_put(skb, len);
> -               addr = desc.addr;
> -               buffer = xsk_buff_raw_get_data(xs->pool, addr);
> -               err = skb_store_bits(skb, 0, buffer, len);
>                 /* This is the backpressure mechanism for the Tx path.
>                  * Reserve space in the completion queue and only proceed
>                  * if there is space in it. This avoids having to implement
>                  * any buffering in the Tx path.
>                  */
> -               if (unlikely(err) || xskq_prod_reserve(xs->pool->cq)) {
> +               if (xskq_prod_reserve(xs->pool->cq)) {
>                         kfree_skb(skb);
>                         goto out;
>                 }
>
> -               skb->dev = xs->dev;
> -               skb->priority = sk->sk_priority;
> -               skb->mark = sk->sk_mark;
> -               skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr;
> -               skb->destructor = xsk_destruct_skb;
> -
>                 err = __dev_direct_xmit(skb, xs->queue_id);
>                 if  (err == NETDEV_TX_BUSY) {
>                         /* Tell user-space to retry the send */
> --
> 1.8.3.1
>