lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEuc2pNao96nTrVQD0VOdpYHk_gbS+K8X5n_J_iiJUVqkg@mail.gmail.com>
Date: Mon, 29 Dec 2025 15:15:06 +0800
From: Jason Wang <jasowang@...hat.com>
To: "Michael S. Tsirkin" <mst@...hat.com>
Cc: xuanzhuo@...ux.alibaba.com, eperezma@...hat.com, 
	virtualization@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH V11 19/19] virtio_ring: add in order support

On Sat, Dec 27, 2025 at 6:09 PM Michael S. Tsirkin <mst@...hat.com> wrote:
>
> On Fri, Dec 26, 2025 at 02:57:03PM -0500, Michael S. Tsirkin wrote:
> > On Thu, Dec 25, 2025 at 12:26:08PM +0800, Jason Wang wrote:
> > > This patch implements in order support for both split virtqueue and
> > > packed virtqueue. Performance could be gained for the device where the
> > > memory access could be expensive (e.g vhost-net or a real PCI device):
> > >
> > > Benchmark with KVM guest:
> > >
> > > Vhost-net on the host: (pktgen + XDP_DROP):
> > >
> > >          in_order=off | in_order=on | +%
> > >     TX:  4.51Mpps     | 5.30Mpps    | +17%
> > >     RX:  3.47Mpps     | 3.61Mpps    | + 4%
> > >
> > > Vhost-user(testpmd) on the host: (pktgen/XDP_DROP):
> > >
> > > For split virtqueue:
> > >
> > >          in_order=off | in_order=on | +%
> > >     TX:  5.60Mpps     | 5.60Mpps    | +0.0%
> > >     RX:  9.16Mpps     | 9.61Mpps    | +4.9%
> > >
> > > For packed virtqueue:
> > >
> > >          in_order=off | in_order=on | +%
> > >     TX:  5.60Mpps     | 5.70Mpps    | +1.7%
> > >     RX:  10.6Mpps     | 10.8Mpps    | +1.8%
> > >
> > > Benchmark also shows no performance impact for in_order=off for queue
> > > size with 256 and 1024.
> > >
> > > Reviewed-by: Eugenio Pérez <eperezma@...hat.com>
> > > Signed-off-by: Jason Wang <jasowang@...hat.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 455 +++++++++++++++++++++++++++++++++--
> > >  1 file changed, 432 insertions(+), 23 deletions(-)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index 61884e031b94..d1bcd1d8c66b 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -70,6 +70,8 @@
> > >  enum vq_layout {
> > >     SPLIT = 0,
> > >     PACKED,
> > > +   SPLIT_IN_ORDER,
> > > +   PACKED_IN_ORDER,
> > >     VQ_TYPE_MAX,
> > >  };
> > >
> > > @@ -80,6 +82,7 @@ struct vring_desc_state_split {
> > >      * allocated together. So we won't stress more to the memory allocator.
> > >      */
> > >     struct vring_desc *indir_desc;
> > > +   u32 total_in_len;
> > >  };
> > >
> > >  struct vring_desc_state_packed {
> > > @@ -91,6 +94,7 @@ struct vring_desc_state_packed {
> > >     struct vring_packed_desc *indir_desc;
> > >     u16 num;                        /* Descriptor list length. */
> > >     u16 last;                       /* The last desc state in a list. */
> > > +   u32 total_in_len;
> > >  };
> > >
> >
> > So let's be clear. Is this the device-written length or the driver supplied
> > length?
> >
> >
> > >  struct vring_desc_extra {
> > > @@ -205,8 +209,24 @@ struct vring_virtqueue {
> > >
> > >     enum vq_layout layout;
> > >
> > > -   /* Head of free buffer list. */
> > > +   /*
> > > +    * Without IN_ORDER it's the head of free buffer list. With
> > > +    * IN_ORDER and SPLIT, it's the next available buffer
> > > +    * index. With IN_ORDER and PACKED, it's unused.
> > > +    */
> > >     unsigned int free_head;
> > > +
> > > +   /*
> > > +    * With IN_ORDER, once we see an in-order batch, this stores
> > > +    * this last entry, and until we return the last buffer.
> > > +    * After this, id is set to UINT_MAX to mark it invalid.
> > > +    * Unused without IN_ORDER.
> > > +    */
> > > +   struct used_entry {
> > > +           u32 id;
> > > +           u32 len;
> > > +   } batch_last;
> > > +
> > >     /* Number we've added since last sync. */
> > >     unsigned int num_added;
> > >
> > > @@ -218,6 +238,11 @@ struct vring_virtqueue {
> > >      */
> > >     u16 last_used_idx;
> > >
> > > +   /* With IN_ORDER and SPLIT, last descriptor id we used to
> > > +    * detach buffer.
> > > +    */
> > > +   u16 last_used;
> > > +
> > >     /* Hint for event idx: already triggered no need to disable. */
> > >     bool event_triggered;
> > >
> > > @@ -259,7 +284,12 @@ static void vring_free(struct virtqueue *_vq);
> > >
> > >  static inline bool virtqueue_is_packed(const struct vring_virtqueue *vq)
> > >  {
> > > -   return vq->layout == PACKED;
> > > +   return vq->layout == PACKED || vq->layout == PACKED_IN_ORDER;
> > > +}
> > > +
> > > +static inline bool virtqueue_is_in_order(const struct vring_virtqueue *vq)
> > > +{
> > > +   return vq->layout == SPLIT_IN_ORDER || vq->layout == PACKED_IN_ORDER;
> > >  }
> > >
> > >  static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
> > > @@ -469,6 +499,8 @@ static void virtqueue_init(struct vring_virtqueue *vq, u32 num)
> > >     else
> > >             vq->last_used_idx = 0;
> > >
> > > +   vq->last_used = 0;
> > > +
> > >     vq->event_triggered = false;
> > >     vq->num_added = 0;
> > >
> > > @@ -576,6 +608,8 @@ static inline int virtqueue_add_split(struct vring_virtqueue *vq,
> > >     struct scatterlist *sg;
> > >     struct vring_desc *desc;
> > >     unsigned int i, n, avail, descs_used, err_idx, sg_count = 0;
> > > +   /* Total length for in-order */
> > > +   unsigned int total_in_len = 0;
> > >     int head;
> > >     bool indirect;
> > >
> > > @@ -667,6 +701,7 @@ static inline int virtqueue_add_split(struct vring_virtqueue *vq,
> > >                      */
> > >                     i = virtqueue_add_desc_split(vq, desc, extra, i, addr,
> > >                                                  len, flags, premapped);
> > > +                   total_in_len += len;
> > >             }
> > >     }
> > >
> > > @@ -689,7 +724,12 @@ static inline int virtqueue_add_split(struct vring_virtqueue *vq,
> > >     vq->vq.num_free -= descs_used;
> > >
> > >     /* Update free pointer */
> > > -   if (indirect)
> > > +   if (virtqueue_is_in_order(vq)) {
> > > +           vq->free_head += descs_used;
> > > +           if (vq->free_head >= vq->split.vring.num)
> > > +                   vq->free_head -= vq->split.vring.num;
> > > +           vq->split.desc_state[head].total_in_len = total_in_len;
> >
> >
> > Looks like total_in_len is the driver supplied length?
> >
> > > +   } else if (indirect)
> > >             vq->free_head = vq->split.desc_extra[head].next;
> > >     else
> > >             vq->free_head = i;
> > > @@ -862,6 +902,14 @@ static bool more_used_split(const struct vring_virtqueue *vq)
> > >     return virtqueue_poll_split(vq, vq->last_used_idx);
> > >  }
> > >
> > > +static bool more_used_split_in_order(const struct vring_virtqueue *vq)
> > > +{
> > > +   if (vq->batch_last.id != UINT_MAX)
> > > +           return true;
> > > +
> > > +   return virtqueue_poll_split(vq, vq->last_used_idx);
> > > +}
> > > +
> > >  static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq,
> > >                                      unsigned int *len,
> > >                                      void **ctx)
> > > @@ -919,6 +967,76 @@ static void *virtqueue_get_buf_ctx_split(struct vring_virtqueue *vq,
> > >     return ret;
> > >  }
> > >
> > > +static void *virtqueue_get_buf_ctx_split_in_order(struct vring_virtqueue *vq,
> > > +                                             unsigned int *len,
> > > +                                             void **ctx)
> > > +{
> > > +   void *ret;
> > > +   unsigned int num = vq->split.vring.num;
> > > +   unsigned int num_free = vq->vq.num_free;
> > > +   u16 last_used, last_used_idx;
> > > +
> > > +   START_USE(vq);
> > > +
> > > +   if (unlikely(vq->broken)) {
> > > +           END_USE(vq);
> > > +           return NULL;
> > > +   }
> > > +
> > > +   last_used = vq->last_used & (num - 1);
> > > +   last_used_idx = vq->last_used_idx & (num - 1);
> > > +
> > > +   if (vq->batch_last.id == UINT_MAX) {
> > > +           if (!more_used_split_in_order(vq)) {
> > > +                   pr_debug("No more buffers in queue\n");
> > > +                   END_USE(vq);
> > > +                   return NULL;
> > > +           }
> > > +
> > > +           /*
> > > +            * Only get used array entries after they have been
> > > +            * exposed by host.
> > > +            */
> > > +           virtio_rmb(vq->weak_barriers);
> > > +
> > > +           vq->batch_last.id = virtio32_to_cpu(vq->vq.vdev,
> > > +                               vq->split.vring.used->ring[last_used_idx].id);
> > > +           vq->batch_last.len = virtio32_to_cpu(vq->vq.vdev,
> > > +                                vq->split.vring.used->ring[last_used_idx].len);
> > > +   }
> > > +
> > > +   if (vq->batch_last.id == last_used) {
> > > +           vq->batch_last.id = UINT_MAX;
> > > +           *len = vq->batch_last.len;
> > > +   } else {
> > > +           *len = vq->split.desc_state[last_used].total_in_len;
> > > +   }
> >
> >
> > but now we return this as buffer length? I think the expected value
> > here is the used length, not the driver supplied one?
> >
> >
> >
> > Same questions apply to packed.
> >
>
> Ah, I got it. These are the skipped buffers:
>
>         The skipped buffers (for which no used descriptor was written)
>         are assumed to have been used (read or written) by the
>         device completely.
>
> maybe a comment won't hurt here. Can be a patch on top.
>

I've fixed this in the new version.

Thanks


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ