[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <8097883a-5775-a270-8ea6-7f2708908515@redhat.com>
Date: Mon, 31 May 2021 14:24:00 +0800
From: Jason Wang <jasowang@...hat.com>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>, netdev@...r.kernel.org
Cc: "Michael S. Tsirkin" <mst@...hat.com>,
"David S. Miller" <davem@...emloft.net>,
Jakub Kicinski <kuba@...nel.org>,
Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <kafai@...com>,
Song Liu <songliubraving@...com>, Yonghong Song <yhs@...com>,
KP Singh <kpsingh@...nel.org>,
virtualization@...ts.linux-foundation.org, bpf@...r.kernel.org
Subject: Re: [PATCH net-next] virtio-net: Refactor the code related to
page_to_skb()
在 2021/5/19 下午5:00, Xuan Zhuo 写道:
> Due to long-term development, the current code structure of
> page_to_skb() and the semantic expression of variables are rather
> chaotic, so it is necessary to reconstruct this piece of logic.
>
> The code has been tested with the following test code.
> Call test_merge and test_big to test "merge" and "big" modes
> respectively.
>
> ================================= test.sh ========================
> function test()
> {
> flag=$1
> max=$2
> inc=$3
> if [ "x$max" == "x" ]
> then
> max=4096
> fi
> echo $max
>
> for s in $(seq 64 $inc $max)
> do
> echo >> log
> echo $flag :UDP_STREAM $s >> log
> netperf -H 192.168.122.202 -l 5 -t UDP_STREAM -- -m $s >> log
> echo $flag :UDP_STREAM $s $?
> done
>
> for s in $(seq 64 $inc $max)
> do
> echo >> log
> echo $flag :TCP_STREAM $s >> log
> netperf -H 192.168.122.202 -l 5 -t TCP_STREAM -- -m $s >> log
> echo $flag :TCP_STREAM $s $?
> done
> }
>
> function test_merge()
> {
> XDP='no-xdp'
>
> ssh root@....168.122.202 ip link set dev eth0 xdp off
> test $XDP
>
> XDP='xdp-pass'
>
> ssh root@....168.122.202 ip link set dev eth0 xdp off
> ssh root@....168.122.202 ip link set dev eth0 xdp object xdp.o sec xdp
> test $XDP
>
> XDP='xdp-meta'
>
> ssh root@....168.122.202 ip link set dev eth0 xdp off
> ssh root@....168.122.202 ip link set dev eth0 xdp object xdp_meta.o sec xdp_mark
> test $XDP
> }
>
> function test_big()
> {
> # set host net dev mtu to 60000
> test "big" 5000
> test "big" 60000 10
> }
>
> ssh root@....168.122.202 ./netserver
> echo '' > log
>
> test_merge # or test_big
> ================================== xdp.c =================================
>
> SEC("xdp")
> int _xdp(struct xdp_md *xdp)
> {
> return XDP_PASS;
> }
>
> char _license[] SEC("license") = "GPL";
>
> ================================== xdp_meta.c =================================
>
> static long (*bpf_xdp_adjust_meta)(struct xdp_md *xdp_md, int delta) = (void *) 54;
>
> struct meta_info {
> __u32 mark;
> } __attribute__((aligned(4)));
>
> SEC("xdp_mark")
> int _xdp_mark(struct xdp_md *ctx)
> {
> struct meta_info *meta;
> void *data, *data_end;
> int ret;
>
> ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta));
> if (ret < 0)
> return XDP_ABORTED;
>
> data = (void *)(unsigned long)ctx->data;
>
> /* Check data_meta have room for meta_info struct */
> meta = (void *)(unsigned long)ctx->data_meta;
> if ((void *)(meta + 1) > data)
> return XDP_ABORTED;
>
> meta->mark = 42;
>
> return XDP_PASS;
> }
>
> char _license[] SEC("license") = "GPL";
> ===================================================================
I'm not sure virtio-net needs such a huge refactoring that just do the
simplification of parameters passing for page_to_skb(). This will
complicate stable backports in the future.
I suspect it may have performance regression.
Do you have any perf numbers? E.g pps.
Thanks
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
> ---
> drivers/net/virtio_net.c | 309 +++++++++++++++++++++++----------------
> 1 file changed, 185 insertions(+), 124 deletions(-)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 7fda2ae4c40f..a117b3496653 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -249,6 +249,35 @@ struct padded_vnet_hdr {
> char padding[4];
> };
>
> +struct virtnet_page_info {
> + struct virtnet_info *vi;
> + struct receive_queue *rq;
> +
> + /* this may be the head_page, buf not starts with this page */
> + struct page *page;
> +
> + /* the allcated buf. this may point to the headroom */
> + char *buf;
> +
> + /* the size of the buf */
> + unsigned int buf_size;
> +
> + /* OUT. the offset of the remaining data in the page */
> + unsigned int offset;
> +
> + char *virtnet_hdr;
> +
> + /* packet data. generally point to eth header */
> + char *packet;
> +
> + /* IN. packet len without virtnet hdr
> + * OUT. the size of the remaining data
> + */
> + unsigned int len;
> +
> + unsigned int metasize;
> +};
> +
> static bool is_xdp_frame(void *ptr)
> {
> return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> @@ -357,6 +386,89 @@ static void skb_xmit_done(struct virtqueue *vq)
> netif_wake_subqueue(vi->dev, vq2txq(vq));
> }
>
> +static struct sk_buff *virtnet_page_to_skb(struct virtnet_page_info *pinfo)
> +{
> + int shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> + struct virtio_net_hdr_mrg_rxbuf *hdr;
> + struct sk_buff *skb;
> + int tailroom, copy;
> +
> + /* In the case of "big", tailroom may be negative, because len can be
> + * greater than PAGE_SIZE.
> + */
> + tailroom = pinfo->buf + pinfo->buf_size - (pinfo->packet + pinfo->len);
> +
> + if (!NET_IP_ALIGN && tailroom >= shinfo_size) {
> + skb = build_skb(pinfo->buf, pinfo->buf_size);
> + if (unlikely(!skb))
> + return NULL;
> +
> + skb_reserve(skb, pinfo->packet - pinfo->buf);
> + skb_put(skb, pinfo->len);
> +
> + /* mark. page has been used. */
> + pinfo->page = NULL;
> + } else {
> + /* copy small data so we can reuse these pages for small data
> + *
> + * GOOD_COPY_LEN is used to save network headers, such as eth
> + * header, ip header, tcp header. If you want to save metadata
> + * information, we should apply for a larger space. Prevent the
> + * network header cannot fit in the linear space.
> + */
> + skb = napi_alloc_skb(&pinfo->rq->napi,
> + pinfo->metasize + GOOD_COPY_LEN);
> + if (unlikely(!skb))
> + return NULL;
> +
> + /* Copy all frame if it fits skb->head, otherwise
> + * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
> + */
> + if (pinfo->len <= GOOD_COPY_LEN)
> + copy = pinfo->len;
> + else
> + copy = ETH_HLEN;
> +
> + skb_put_data(skb, pinfo->packet - pinfo->metasize,
> + copy + pinfo->metasize);
> + __skb_pull(skb, pinfo->metasize);
> + pinfo->len -= copy;
> + pinfo->offset = pinfo->packet + copy -
> + (char *)page_address(pinfo->page);
> + }
> +
> + if (pinfo->metasize)
> + skb_metadata_set(skb, pinfo->metasize);
> +
> + if (pinfo->virtnet_hdr) {
> + hdr = skb_vnet_hdr(skb);
> + memcpy(hdr, pinfo->virtnet_hdr, pinfo->vi->hdr_len);
> + }
> +
> + return skb;
> +}
> +
> +static struct sk_buff *virtnet_merge_page_to_skb(struct virtnet_page_info *pinfo)
> +{
> + struct sk_buff *skb;
> +
> + skb = virtnet_page_to_skb(pinfo);
> + if (unlikely(!skb))
> + return NULL;
> +
> + /* page has been used by build_skb() */
> + if (!pinfo->page)
> + return skb;
> +
> + if (pinfo->len)
> + skb_add_rx_frag(skb, 0, pinfo->page, pinfo->offset, pinfo->len,
> + pinfo->buf_size);
> + else
> + put_page(pinfo->page);
> +
> + return skb;
> +}
> +
> #define MRG_CTX_HEADER_SHIFT 22
> static void *mergeable_len_to_ctx(unsigned int truesize,
> unsigned int headroom)
> @@ -375,86 +487,30 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
> }
>
> /* Called from bottom half context */
> -static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> - struct receive_queue *rq,
> - struct page *page, unsigned int offset,
> - unsigned int len, unsigned int truesize,
> - bool hdr_valid, unsigned int metasize,
> - unsigned int headroom)
> +static struct sk_buff *virtnet_big_page_to_skb(struct virtnet_page_info *pinfo)
> {
> + unsigned int len, offset, truesize;
> + struct receive_queue *rq;
> struct sk_buff *skb;
> - struct virtio_net_hdr_mrg_rxbuf *hdr;
> - unsigned int copy, hdr_len, hdr_padded_len;
> - struct page *page_to_free = NULL;
> - int tailroom, shinfo_size;
> - char *p, *hdr_p, *buf;
> + struct page *page;
>
> - p = page_address(page) + offset;
> - hdr_p = p;
> + /* save next page */
> + page = (struct page *)pinfo->page->private;
>
> - hdr_len = vi->hdr_len;
> - if (vi->mergeable_rx_bufs)
> - hdr_padded_len = sizeof(*hdr);
> - else
> - hdr_padded_len = sizeof(struct padded_vnet_hdr);
> -
> - /* If headroom is not 0, there is an offset between the beginning of the
> - * data and the allocated space, otherwise the data and the allocated
> - * space are aligned.
> - */
> - if (headroom) {
> - /* Buffers with headroom use PAGE_SIZE as alloc size,
> - * see add_recvbuf_mergeable() + get_mergeable_buf_len()
> - */
> - truesize = PAGE_SIZE;
> - tailroom = truesize - len - offset;
> - buf = page_address(page);
> - } else {
> - tailroom = truesize - len;
> - buf = p;
> - }
> -
> - len -= hdr_len;
> - offset += hdr_padded_len;
> - p += hdr_padded_len;
> -
> - shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> -
> - /* copy small packet so we can reuse these pages */
> - if (!NET_IP_ALIGN && len > GOOD_COPY_LEN && tailroom >= shinfo_size) {
> - skb = build_skb(buf, truesize);
> - if (unlikely(!skb))
> - return NULL;
> -
> - skb_reserve(skb, p - buf);
> - skb_put(skb, len);
> - goto ok;
> - }
> -
> - /* copy small packet so we can reuse these pages for small data */
> - skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
> + skb = virtnet_page_to_skb(pinfo);
> if (unlikely(!skb))
> return NULL;
>
> - /* Copy all frame if it fits skb->head, otherwise
> - * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
> - */
> - if (len <= skb_tailroom(skb))
> - copy = len;
> - else
> - copy = ETH_HLEN + metasize;
> - skb_put_data(skb, p, copy);
> + rq = pinfo->rq;
>
> - len -= copy;
> - offset += copy;
> + /* page has been used by build_skb() */
> + if (!pinfo->page)
> + goto end;
>
> - if (vi->mergeable_rx_bufs) {
> - if (len)
> - skb_add_rx_frag(skb, 0, page, offset, len, truesize);
> - else
> - page_to_free = page;
> - goto ok;
> - }
> + page = pinfo->page;
> + len = pinfo->len;
> + offset = pinfo->offset;
> + truesize = pinfo->buf_size;
>
> /*
> * Verify that we can indeed put this data into a skb.
> @@ -477,23 +533,10 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
> offset = 0;
> }
>
> +end:
> if (page)
> give_pages(rq, page);
>
> -ok:
> - /* hdr_valid means no XDP, so we can copy the vnet header */
> - if (hdr_valid) {
> - hdr = skb_vnet_hdr(skb);
> - memcpy(hdr, hdr_p, hdr_len);
> - }
> - if (page_to_free)
> - put_page(page_to_free);
> -
> - if (metasize) {
> - __skb_pull(skb, metasize);
> - skb_metadata_set(skb, metasize);
> - }
> -
> return skb;
> }
>
> @@ -654,17 +697,17 @@ static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
> */
> static struct page *xdp_linearize_page(struct receive_queue *rq,
> u16 *num_buf,
> - struct page *p,
> - int offset,
> + void *buf,
> int page_off,
> unsigned int *len)
> {
> struct page *page = alloc_page(GFP_ATOMIC);
> + struct page *p;
>
> if (!page)
> return NULL;
>
> - memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
> + memcpy(page_address(page) + page_off, buf, *len);
> page_off += *len;
>
> while (--*num_buf) {
> @@ -739,18 +782,18 @@ static struct sk_buff *receive_small(struct net_device *dev,
> goto err_xdp;
>
> if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) {
> - int offset = buf - page_address(page) + header_offset;
> unsigned int tlen = len + vi->hdr_len;
> u16 num_buf = 1;
>
> + buf += header_offset;
> +
> xdp_headroom = virtnet_get_headroom(vi);
> header_offset = VIRTNET_RX_PAD + xdp_headroom;
> headroom = vi->hdr_len + header_offset;
> buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
> SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
> - xdp_page = xdp_linearize_page(rq, &num_buf, page,
> - offset, header_offset,
> - &tlen);
> + xdp_page = xdp_linearize_page(rq, &num_buf, buf,
> + header_offset, &tlen);
> if (!xdp_page)
> goto err_xdp;
>
> @@ -842,9 +885,21 @@ static struct sk_buff *receive_big(struct net_device *dev,
> unsigned int len,
> struct virtnet_rq_stats *stats)
> {
> + struct virtnet_page_info pinfo;
> struct page *page = buf;
> - struct sk_buff *skb =
> - page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0, 0);
> + struct sk_buff *skb;
> +
> + pinfo.rq = rq;
> + pinfo.vi = vi;
> + pinfo.page = page;
> + pinfo.buf = page_address(page);
> + pinfo.buf_size = PAGE_SIZE;
> + pinfo.virtnet_hdr = pinfo.buf;
> + pinfo.packet = pinfo.virtnet_hdr + sizeof(struct padded_vnet_hdr);
> + pinfo.len = len - vi->hdr_len;
> + pinfo.metasize = 0;
> +
> + skb = virtnet_big_page_to_skb(&pinfo);
>
> stats->bytes += len - vi->hdr_len;
> if (unlikely(!skb))
> @@ -870,12 +925,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
> u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
> struct page *page = virt_to_head_page(buf);
> - int offset = buf - page_address(page);
> struct sk_buff *head_skb, *curr_skb;
> + struct virtnet_page_info pinfo;
> struct bpf_prog *xdp_prog;
> unsigned int truesize = mergeable_ctx_to_truesize(ctx);
> unsigned int headroom = mergeable_ctx_to_headroom(ctx);
> - unsigned int metasize = 0;
> unsigned int frame_sz;
> int err;
>
> @@ -887,8 +941,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> if (xdp_prog) {
> struct xdp_frame *xdpf;
> struct page *xdp_page;
> + void *hard_start;
> struct xdp_buff xdp;
> - void *data;
> u32 act;
>
> /* Transient failure which in theory could occur if
> @@ -912,54 +966,47 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> if (unlikely(num_buf > 1 ||
> headroom < virtnet_get_headroom(vi))) {
> /* linearize data for XDP */
> - xdp_page = xdp_linearize_page(rq, &num_buf,
> - page, offset,
> + xdp_page = xdp_linearize_page(rq, &num_buf, buf,
> VIRTIO_XDP_HEADROOM,
> &len);
> frame_sz = PAGE_SIZE;
>
> if (!xdp_page)
> goto err_xdp;
> - offset = VIRTIO_XDP_HEADROOM;
> +
> + hard_start = page_address(xdp_page) + vi->hdr_len;
> } else {
> xdp_page = page;
> + hard_start = buf + vi->hdr_len - VIRTIO_XDP_HEADROOM;
> }
>
> /* Allow consuming headroom but reserve enough space to push
> * the descriptor on if we get an XDP_TX return code.
> */
> - data = page_address(xdp_page) + offset;
> xdp_init_buff(&xdp, frame_sz - vi->hdr_len, &rq->xdp_rxq);
> - xdp_prepare_buff(&xdp, data - VIRTIO_XDP_HEADROOM + vi->hdr_len,
> - VIRTIO_XDP_HEADROOM, len - vi->hdr_len, true);
> + xdp_prepare_buff(&xdp, hard_start, VIRTIO_XDP_HEADROOM,
> + len - vi->hdr_len, true);
>
> act = bpf_prog_run_xdp(xdp_prog, &xdp);
> stats->xdp_packets++;
>
> switch (act) {
> case XDP_PASS:
> - metasize = xdp.data - xdp.data_meta;
> -
> - /* recalculate offset to account for any header
> - * adjustments and minus the metasize to copy the
> - * metadata in page_to_skb(). Note other cases do not
> - * build an skb and avoid using offset
> - */
> - offset = xdp.data - page_address(xdp_page) -
> - vi->hdr_len - metasize;
> -
> - /* recalculate len if xdp.data, xdp.data_end or
> - * xdp.data_meta were adjusted
> - */
> - len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
> + pinfo.rq = rq;
> + pinfo.vi = vi;
> + pinfo.page = xdp_page;
> + pinfo.buf = xdp.data_hard_start - vi->hdr_len;
> + pinfo.buf_size = PAGE_SIZE;
> + pinfo.virtnet_hdr = NULL;
> + pinfo.packet = xdp.data;
> + pinfo.len = xdp.data_end - xdp.data;
> + pinfo.metasize = xdp.data - xdp.data_meta;
> /* We can only create skb based on xdp_page. */
> if (unlikely(xdp_page != page)) {
> rcu_read_unlock();
> put_page(page);
> - head_skb = page_to_skb(vi, rq, xdp_page, offset,
> - len, PAGE_SIZE, false,
> - metasize, headroom);
> - return head_skb;
> +
> + return virtnet_merge_page_to_skb(&pinfo);
> }
> break;
> case XDP_TX:
> @@ -1005,8 +1052,22 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> __free_pages(xdp_page, 0);
> goto err_xdp;
> }
> + rcu_read_unlock();
> +
> + /* pinfo has been filled inside XDP_PASS */
> + } else {
> + rcu_read_unlock();
> +
> + pinfo.rq = rq;
> + pinfo.vi = vi;
> + pinfo.page = page;
> + pinfo.buf = buf - headroom;
> + pinfo.buf_size = headroom ? PAGE_SIZE : truesize;
> + pinfo.virtnet_hdr = buf;
> + pinfo.packet = buf + sizeof(*hdr);
> + pinfo.len = len - sizeof(*hdr);
> + pinfo.metasize = 0;
> }
> - rcu_read_unlock();
>
> if (unlikely(len > truesize)) {
> pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
> @@ -1015,14 +1076,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
> goto err_skb;
> }
>
> - head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
> - metasize, headroom);
> + head_skb = virtnet_merge_page_to_skb(&pinfo);
> curr_skb = head_skb;
>
> if (unlikely(!curr_skb))
> goto err_skb;
> while (--num_buf) {
> int num_skb_frags;
> + int offset;
>
> buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
> if (unlikely(!buf)) {
Powered by blists - more mailing lists