linux-kernel - Re: [PATCH net-next 2/2] vhost_net: basic in

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJaqyWcSakXs2GC5QkRtT7BjOK3Mzb-RxS198N+ePqKG9h_BhA@mail.gmail.com>
Date: Thu, 10 Jul 2025 13:56:01 +0200
From: Eugenio Perez Martin <eperezma@...hat.com>
To: Jason Wang <jasowang@...hat.com>
Cc: mst@...hat.com, kvm@...r.kernel.org, virtualization@...ts.linux.dev, 
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org, jonah.palmer@...cle.com
Subject: Re: [PATCH net-next 2/2] vhost_net: basic in_order support

On Tue, Jul 8, 2025 at 8:48 AM Jason Wang <jasowang@...hat.com> wrote:
>
> This patch introduces basic in-order support for vhost-net. By
> recording the number of batched buffers in an array when calling
> `vhost_add_used_and_signal_n()`, we can reduce the number of userspace
> accesses. Note that the vhost-net batching logic is kept as we still
> count the number of buffers there.
>
> Testing Results:
>
> With testpmd:
>
> - TX: txonly mode + vhost_net with XDP_DROP on TAP shows a 17.5%
>   improvement, from 4.75 Mpps to 5.35 Mpps.
> - RX: No obvious improvements were observed.
>
> With virtio-ring in-order experimental code in the guest:
>
> - TX: pktgen in the guest + XDP_DROP on  TAP shows a 19% improvement,
>   from 5.2 Mpps to 6.2 Mpps.
> - RX: pktgen on TAP with vhost_net + XDP_DROP in the guest achieves a
>   6.1% improvement, from 3.47 Mpps to 3.61 Mpps.
>
> Signed-off-by: Jason Wang <jasowang@...hat.com>

Acked-by: Eugenio Pérez <eperezma@...hat.com>

Thanks!

> ---
>  drivers/vhost/net.c | 86 ++++++++++++++++++++++++++++++++-------------
>  1 file changed, 61 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4f9c67f17b49..8ac994b3228a 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -74,7 +74,8 @@ enum {
>                          (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
>                          (1ULL << VIRTIO_NET_F_MRG_RXBUF) |
>                          (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
> -                        (1ULL << VIRTIO_F_RING_RESET)
> +                        (1ULL << VIRTIO_F_RING_RESET) |
> +                        (1ULL << VIRTIO_F_IN_ORDER)
>  };
>
>  enum {
> @@ -450,7 +451,8 @@ static int vhost_net_enable_vq(struct vhost_net *n,
>         return vhost_poll_start(poll, sock->file);
>  }
>
> -static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
> +static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq,
> +                                 unsigned int count)
>  {
>         struct vhost_virtqueue *vq = &nvq->vq;
>         struct vhost_dev *dev = vq->dev;
> @@ -458,8 +460,8 @@ static void vhost_net_signal_used(struct vhost_net_virtqueue *nvq)
>         if (!nvq->done_idx)
>                 return;
>
> -       vhost_add_used_and_signal_n(dev, vq, vq->heads, NULL,
> -                                   nvq->done_idx);
> +       vhost_add_used_and_signal_n(dev, vq, vq->heads,
> +                                   vq->nheads, count);
>         nvq->done_idx = 0;
>  }
>
> @@ -468,6 +470,8 @@ static void vhost_tx_batch(struct vhost_net *net,
>                            struct socket *sock,
>                            struct msghdr *msghdr)
>  {
> +       struct vhost_virtqueue *vq = &nvq->vq;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>         struct tun_msg_ctl ctl = {
>                 .type = TUN_MSG_PTR,
>                 .num = nvq->batched_xdp,
> @@ -475,6 +479,11 @@ static void vhost_tx_batch(struct vhost_net *net,
>         };
>         int i, err;
>
> +       if (in_order) {
> +               vq->heads[0].len = 0;
> +               vq->nheads[0] = nvq->done_idx;
> +       }
> +
>         if (nvq->batched_xdp == 0)
>                 goto signal_used;
>
> @@ -496,7 +505,7 @@ static void vhost_tx_batch(struct vhost_net *net,
>         }
>
>  signal_used:
> -       vhost_net_signal_used(nvq);
> +       vhost_net_signal_used(nvq, in_order ? 1 : nvq->done_idx);
>         nvq->batched_xdp = 0;
>  }
>
> @@ -758,6 +767,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>         int sent_pkts = 0;
>         bool sock_can_batch = (sock->sk->sk_sndbuf == INT_MAX);
>         bool busyloop_intr;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>
>         do {
>                 busyloop_intr = false;
> @@ -794,11 +804,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>                                 break;
>                         }
>
> -                       /* We can't build XDP buff, go for single
> -                        * packet path but let's flush batched
> -                        * packets.
> -                        */
> -                       vhost_tx_batch(net, nvq, sock, &msg);
> +                       if (nvq->batched_xdp) {
> +                               /* We can't build XDP buff, go for single
> +                                * packet path but let's flush batched
> +                                * packets.
> +                                */
> +                               vhost_tx_batch(net, nvq, sock, &msg);
> +                       }
>                         msg.msg_control = NULL;
>                 } else {
>                         if (tx_can_batch(vq, total_len))
> @@ -819,8 +831,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
>                         pr_debug("Truncated TX packet: len %d != %zd\n",
>                                  err, len);
>  done:
> -               vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> -               vq->heads[nvq->done_idx].len = 0;
> +               if (in_order) {
> +                       vq->heads[0].id = cpu_to_vhost32(vq, head);
> +               } else {
> +                       vq->heads[nvq->done_idx].id = cpu_to_vhost32(vq, head);
> +                       vq->heads[nvq->done_idx].len = 0;
> +               }
>                 ++nvq->done_idx;
>         } while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
>
> @@ -999,7 +1015,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
>  }
>
>  static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
> -                                     bool *busyloop_intr)
> +                                     bool *busyloop_intr, unsigned int count)
>  {
>         struct vhost_net_virtqueue *rnvq = &net->vqs[VHOST_NET_VQ_RX];
>         struct vhost_net_virtqueue *tnvq = &net->vqs[VHOST_NET_VQ_TX];
> @@ -1009,7 +1025,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>
>         if (!len && rvq->busyloop_timeout) {
>                 /* Flush batched heads first */
> -               vhost_net_signal_used(rnvq);
> +               vhost_net_signal_used(rnvq, count);
>                 /* Both tx vq and rx socket were polled here */
>                 vhost_net_busy_poll(net, rvq, tvq, busyloop_intr, true);
>
> @@ -1021,7 +1037,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>
>  /* This is a multi-buffer version of vhost_get_desc, that works if
>   *     vq has read descriptors only.
> - * @vq         - the relevant virtqueue
> + * @nvq                - the relevant vhost_net virtqueue
>   * @datalen    - data length we'll be reading
>   * @iovcount   - returned count of io vectors we fill
>   * @log                - vhost log
> @@ -1029,14 +1045,17 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
>   * @quota       - headcount quota, 1 for big buffer
>   *     returns number of buffer heads allocated, negative on error
>   */
> -static int get_rx_bufs(struct vhost_virtqueue *vq,
> +static int get_rx_bufs(struct vhost_net_virtqueue *nvq,
>                        struct vring_used_elem *heads,
> +                      u16 *nheads,
>                        int datalen,
>                        unsigned *iovcount,
>                        struct vhost_log *log,
>                        unsigned *log_num,
>                        unsigned int quota)
>  {
> +       struct vhost_virtqueue *vq = &nvq->vq;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
>         unsigned int out, in;
>         int seg = 0;
>         int headcount = 0;
> @@ -1073,14 +1092,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>                         nlogs += *log_num;
>                         log += *log_num;
>                 }
> -               heads[headcount].id = cpu_to_vhost32(vq, d);
>                 len = iov_length(vq->iov + seg, in);
> -               heads[headcount].len = cpu_to_vhost32(vq, len);
> -               datalen -= len;
> +               if (!in_order) {
> +                       heads[headcount].id = cpu_to_vhost32(vq, d);
> +                       heads[headcount].len = cpu_to_vhost32(vq, len);
> +               }
>                 ++headcount;
> +               datalen -= len;
>                 seg += in;
>         }
> -       heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +
>         *iovcount = seg;
>         if (unlikely(log))
>                 *log_num = nlogs;
> @@ -1090,6 +1111,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>                 r = UIO_MAXIOV + 1;
>                 goto err;
>         }
> +
> +       if (!in_order)
> +               heads[headcount - 1].len = cpu_to_vhost32(vq, len + datalen);
> +       else {
> +               heads[0].len = cpu_to_vhost32(vq, len + datalen);
> +               heads[0].id = cpu_to_vhost32(vq, d);
> +               nheads[0] = headcount;
> +       }
> +
>         return headcount;
>  err:
>         vhost_discard_vq_desc(vq, headcount);
> @@ -1102,6 +1132,8 @@ static void handle_rx(struct vhost_net *net)
>  {
>         struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
>         struct vhost_virtqueue *vq = &nvq->vq;
> +       bool in_order = vhost_has_feature(vq, VIRTIO_F_IN_ORDER);
> +       unsigned int count = 0;
>         unsigned in, log;
>         struct vhost_log *vq_log;
>         struct msghdr msg = {
> @@ -1149,12 +1181,13 @@ static void handle_rx(struct vhost_net *net)
>
>         do {
>                 sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
> -                                                     &busyloop_intr);
> +                                                     &busyloop_intr, count);
>                 if (!sock_len)
>                         break;
>                 sock_len += sock_hlen;
>                 vhost_len = sock_len + vhost_hlen;
> -               headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
> +               headcount = get_rx_bufs(nvq, vq->heads + count,
> +                                       vq->nheads + count,
>                                         vhost_len, &in, vq_log, &log,
>                                         likely(mergeable) ? UIO_MAXIOV : 1);
>                 /* On error, stop handling until the next kick. */
> @@ -1230,8 +1263,11 @@ static void handle_rx(struct vhost_net *net)
>                         goto out;
>                 }
>                 nvq->done_idx += headcount;
> -               if (nvq->done_idx > VHOST_NET_BATCH)
> -                       vhost_net_signal_used(nvq);
> +               count += in_order ? 1 : headcount;
> +               if (nvq->done_idx > VHOST_NET_BATCH) {
> +                       vhost_net_signal_used(nvq, count);
> +                       count = 0;
> +               }
>                 if (unlikely(vq_log))
>                         vhost_log_write(vq, vq_log, log, vhost_len,
>                                         vq->iov, in);
> @@ -1243,7 +1279,7 @@ static void handle_rx(struct vhost_net *net)
>         else if (!sock_len)
>                 vhost_net_enable_vq(net, vq);
>  out:
> -       vhost_net_signal_used(nvq);
> +       vhost_net_signal_used(nvq, count);
>         mutex_unlock(&vq->mutex);
>  }
>
> --
> 2.31.1
>