[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEsTwcKHQfp5skDHE6mp-tdK88oKteU2ZtKY19ik8HgN0A@mail.gmail.com>
Date: Fri, 16 May 2025 09:30:01 +0800
From: Jason Wang <jasowang@...hat.com>
To: "Michael S. Tsirkin" <mst@...hat.com>
Cc: xuanzhuo@...ux.alibaba.com, eperezma@...hat.com,
virtualization@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 13/19] virtio_ring: introduce virtqueue ops
On Wed, May 14, 2025 at 10:24 PM Michael S. Tsirkin <mst@...hat.com> wrote:
>
> On Wed, May 14, 2025 at 10:19:05AM -0400, Michael S. Tsirkin wrote:
> > On Wed, Apr 09, 2025 at 12:06:03PM +0800, Jason Wang wrote:
> > > On Tue, Apr 8, 2025 at 7:37 PM Michael S. Tsirkin <mst@...hat.com> wrote:
> > > >
> > > > On Tue, Apr 08, 2025 at 03:02:35PM +0800, Jason Wang wrote:
> > > > > On Mon, Apr 7, 2025 at 4:20 PM Michael S. Tsirkin <mst@...hat.com> wrote:
> > > > > >
> > > > > > On Mon, Mar 24, 2025 at 02:01:21PM +0800, Jason Wang wrote:
> > > > > > > This patch introduces virtqueue ops which is a set of the callbacks
> > > > > > > that will be called for different queue layout or features. This would
> > > > > > > help to avoid branches for split/packed and will ease the future
> > > > > > > implementation like in order.
> > > > > > >
> > > > > > > Signed-off-by: Jason Wang <jasowang@...hat.com>
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > ---
> > > > > > > drivers/virtio/virtio_ring.c | 96 +++++++++++++++++++++++++-----------
> > > > > > > 1 file changed, 67 insertions(+), 29 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > index a2884eae14d9..ce1dc90ee89d 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -159,9 +159,30 @@ struct vring_virtqueue_packed {
> > > > > > > size_t event_size_in_bytes;
> > > > > > > };
> > > > > > >
> > > > > > > +struct vring_virtqueue;
> > > > > > > +
> > > > > > > +struct virtqueue_ops {
> > > > > > > + int (*add)(struct vring_virtqueue *_vq, struct scatterlist *sgs[],
> > > > > > > + unsigned int total_sg, unsigned int out_sgs,
> > > > > > > + unsigned int in_sgs, void *data,
> > > > > > > + void *ctx, bool premapped, gfp_t gfp);
> > > > > > > + void *(*get)(struct vring_virtqueue *vq, unsigned int *len, void **ctx);
> > > > > > > + bool (*kick_prepare)(struct vring_virtqueue *vq);
> > > > > > > + void (*disable_cb)(struct vring_virtqueue *vq);
> > > > > > > + bool (*enable_cb_delayed)(struct vring_virtqueue *vq);
> > > > > > > + unsigned int (*enable_cb_prepare)(struct vring_virtqueue *vq);
> > > > > > > + bool (*poll)(const struct vring_virtqueue *vq, u16 last_used_idx);
> > > > > > > + void *(*detach_unused_buf)(struct vring_virtqueue *vq);
> > > > > > > + bool (*more_used)(const struct vring_virtqueue *vq);
> > > > > > > + int (*resize)(struct vring_virtqueue *vq, u32 num);
> > > > > > > + void (*reset)(struct vring_virtqueue *vq);
> > > > > > > +};
> > > > > >
> > > > > > I like it that it's organized but
> > > > > > I worry about the overhead of indirect calls here.
> > > > >
> > > > > We can switch to use INDIRECT_CALL_X() here
> > > >
> > > > If you think it's cleaner.. but INDIRECT_CALL is all chained
> > >
> > > Yes, and it would be problematic as the number of ops increased.
> > >
> > > > while a switch can do a binary search.
> > > >
> > >
> > > Do you mean a nested switch?
> >
> > Not sure what is nested. gcc does a decent job of optimizing
> > switches. You have 4 types of ops:
> > packed/packed in order/split/split in order
> >
> > So:
> >
> > enum {
> > VQ_SPLIT,
> > VQ_SPLIT_IN_ORDER,
> > VQ_PACKED,
> > VQ_PACKED_IN_ORDER,
> > }
> >
> >
> > I do not see how it is worse?
> >
> >
>
>
>
> Actually, here is an idea - create an array of ops:
>
>
>
> enum vqtype {
> SPLIT,
> SPLIT_IN_ORDER,
> PACKED,
> PACKED_IN_ORDER,
> MAX
> };
>
>
> struct ops {
> int (*add)(int bar);
> };
>
> extern int packed(int);
> extern int packedinorder(int);
> extern int split(int);
> extern int splitinorder(int);
>
> const struct ops allops[MAX] = { [SPLIT] = {split}, [SPLIT_IN_ORDER] = { splitinorder}, [PACKED] = { packed }, [PACKED_IN_ORDER] = {packedinorder}};
>
> int main(int argc, char **argv)
> {
> switch (argc) {
> case 0:
> return allops[PACKED].foo(argc);
> case 1:
> return allops[SPLIT].foo(argc);
> default:
> return allops[PACKED_IN_ORDER].foo(argc);
This still looks like an indirection call as we don't call the symbol
directly but need to load the function address into a register.
> }
> }
>
>
> I tested this and compiler is able to elide the indirect calls.
I've tried the following:
struct virtqueue_ops split_ops = {
.add = virtqueue_add_split,
.get = virtqueue_get_buf_ctx_split,
.kick_prepare = virtqueue_kick_prepare_split,
.disable_cb = virtqueue_disable_cb_split,
.enable_cb_delayed = virtqueue_enable_cb_delayed_split,
.enable_cb_prepare = virtqueue_enable_cb_prepare_split,
.poll = virtqueue_poll_split,
.detach_unused_buf = virtqueue_detach_unused_buf_split,
.more_used = more_used_split,
.resize = virtqueue_resize_split,
.reset = virtqueue_reset_split,
};
struct virtqueue_ops packed_ops = {
.add = virtqueue_add_packed,
.get = virtqueue_get_buf_ctx_packed,
.kick_prepare = virtqueue_kick_prepare_packed,
.disable_cb = virtqueue_disable_cb_packed,
.enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
.enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
.poll = virtqueue_poll_packed,
.detach_unused_buf = virtqueue_detach_unused_buf_packed,
.more_used = more_used_packed,
.resize = virtqueue_resize_packed,
.reset = virtqueue_reset_packed,
};
const struct virtqueue_ops *all_ops[VQ_TYPE_MAX] = { [SPLIT] = &split_ops,
[PACKED] = &packed_ops};
unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
if (vq->event_triggered)
vq->event_triggered = false;
switch (vq->layout) {
case SPLIT:
return all_ops[SPLIT]->enable_cb_prepare(vq);
break;
case PACKED:
return all_ops[PACKED]->enable_cb_prepare(vq);
break;
default:
BUG();
break;
}
return -EFAULT;
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
Compilers gives me (when RETPOLINE is enabled):
ffffffff8193a870 <virtqueue_enable_cb_prepare>:
ffffffff8193a870: f3 0f 1e fa endbr64
ffffffff8193a874: e8 47 68 93 ff callq
ffffffff812710c0 <__fentry__>
ffffffff8193a879: 80 bf 8e 00 00 00 00 cmpb $0x0,0x8e(%rdi)
ffffffff8193a880: 74 07 je
ffffffff8193a889 <virtqueue_enable_cb_prepare+0x19>
ffffffff8193a882: c6 87 8e 00 00 00 00 movb $0x0,0x8e(%rdi)
ffffffff8193a889: 8b 87 80 00 00 00 mov 0x80(%rdi),%eax
ffffffff8193a88f: 85 c0 test %eax,%eax
ffffffff8193a891: 74 15 je
ffffffff8193a8a8 <virtqueue_enable_cb_prepare+0x38>
ffffffff8193a893: 83 f8 01 cmp $0x1,%eax
ffffffff8193a896: 75 20 jne
ffffffff8193a8b8 <virtqueue_enable_cb_prepare+0x48>
ffffffff8193a898: 48 8b 05 49 03 4a 01 mov
0x14a0349(%rip),%rax # ffffffff82ddabe8 <all_ops+0x8>
ffffffff8193a89f: 48 8b 40 28 mov 0x28(%rax),%rax
ffffffff8193a8a3: e9 b8 d8 9b 00 jmpq
ffffffff822f8160 <__x86_indirect_thunk_array>
ffffffff8193a8a8: 48 8b 05 31 03 4a 01 mov
0x14a0331(%rip),%rax # ffffffff82ddabe0 <all_ops>
ffffffff8193a8af: 48 8b 40 28 mov 0x28(%rax),%rax
ffffffff8193a8b3: e9 a8 d8 9b 00 jmpq
ffffffff822f8160 <__x86_indirect_thunk_array>
ffffffff8193a8b8: 0f 0b ud2
ffffffff8193a8ba: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
indirection call is still being mitigated via thunk.
The way I can think so far is something like this that passess the
function symbol to the macro:
#define VIRTQUEUE_CALL(vq, split_fn, packed_fn, ...) \
({ \
typeof(split_fn(vq, ##__VA_ARGS__)) ret; \
switch ((vq)->layout) { \
case SPLIT: \
ret = split_fn(vq, ##__VA_ARGS__); \
break; \
case PACKED: \
ret = packed_fn(vq, ##__VA_ARGS__); \
break; \
default: \
BUG(); \
ret = (typeof(ret))-EFAULT; \
break; \
} \
ret; \
})
Then I can't see RETPOLINE for indirect calls.
Thanks
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> >
> >
> >
> >
> >
> >
> > > >
> > > > > (but I'm not sure we
> > > > > should worry about it too much as ndo_ops or qdiscs doesn't use that).
> > > >
> > > >
> > > > And that's why we ended up with xdp, no? the stack's too heavy ...
> > > >
> > > > > > How about a switch statement instead?
> > > > > >
> > > > > > struct vring_virtqueue {
> > > > > > enum vring_virtqueue_ops ops;
> > > > > >
> > > > > > }
> > > > > >
> > > > > >
> > > > > > @@ -2248,10 +2303,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> > > > > > {
> > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > >
> > > > > > switch (vq->ops) {
> > > > > > VQ_PACKED:
> > > > > > VQ_SPLIT:
> > > > > > VQ_IN_ORDER:
> > > > > > }
> > > > > >
> > > > > >
> > > > > > }
> > > > > >
> > > > > >
> > > > > > What do you think?
> > > > >
> > > > > Actually, the matrix will be 2*2:
> > > > >
> > > > > PACKED, SPLIT, PACKED_IN_ORDER, SPLIT_IN_ORDER
> > > >
> > > > Confused. Same amount of enums as ops structures in your approach, no?
> > >
> > > I meant in this series, we will have 4 ops not 3.
> > >
> > > >
> > > >
> > > > > And will be doubled if a new layout is implemented.
> > > > >
> > > > > If we open them such a switch will spread in a lot of places in the code.
> > > > >
> > > > > Thanks
> > >
> > > Thanks
> > >
> > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > +
> > > > > > > struct vring_virtqueue {
> > > > > > > struct virtqueue vq;
> > > > > > >
> > > > > > > + struct virtqueue_ops *ops;
> > > > > > > +
> > > > > > > /* Is this a packed ring? */
> > > > > > > bool packed_ring;
> > > > > > >
> > > > > > > @@ -1116,6 +1137,8 @@ static int vring_alloc_queue_split(struct vring_virtqueue_split *vring_split,
> > > > > > > return 0;
> > > > > > > }
> > > > > > >
> > > > > > > +struct virtqueue_ops split_ops;
> > > > > > > +
> > > > > > > static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
> > > > > > > struct vring_virtqueue_split *vring_split,
> > > > > > > struct virtio_device *vdev,
> > > > > > > @@ -1134,6 +1157,7 @@ static struct virtqueue *__vring_new_virtqueue_split(unsigned int index,
> > > > > > > return NULL;
> > > > > > >
> > > > > > > vq->packed_ring = false;
> > > > > > > + vq->ops = &split_ops;
> > > > > > > vq->vq.callback = callback;
> > > > > > > vq->vq.vdev = vdev;
> > > > > > > vq->vq.name = name;
> > > > > > > @@ -2076,6 +2100,8 @@ static void virtqueue_reset_packed(struct vring_virtqueue *vq)
> > > > > > > virtqueue_vring_init_packed(&vq->packed, !!vq->vq.callback);
> > > > > > > }
> > > > > > >
> > > > > > > +struct virtqueue_ops packed_ops;
> > > > > > > +
> > > > > > > static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
> > > > > > > struct vring_virtqueue_packed *vring_packed,
> > > > > > > struct virtio_device *vdev,
> > > > > > > @@ -2107,6 +2133,7 @@ static struct virtqueue *__vring_new_virtqueue_packed(unsigned int index,
> > > > > > > vq->broken = false;
> > > > > > > #endif
> > > > > > > vq->packed_ring = true;
> > > > > > > + vq->ops = &packed_ops;
> > > > > > > vq->dma_dev = dma_dev;
> > > > > > > vq->use_dma_api = vring_use_dma_api(vdev);
> > > > > > >
> > > > > > > @@ -2194,6 +2221,34 @@ static int virtqueue_resize_packed(struct vring_virtqueue *vq, u32 num)
> > > > > > > return -ENOMEM;
> > > > > > > }
> > > > > > >
> > > > > > > +struct virtqueue_ops split_ops = {
> > > > > > > + .add = virtqueue_add_split,
> > > > > > > + .get = virtqueue_get_buf_ctx_split,
> > > > > > > + .kick_prepare = virtqueue_kick_prepare_split,
> > > > > > > + .disable_cb = virtqueue_disable_cb_split,
> > > > > > > + .enable_cb_delayed = virtqueue_enable_cb_delayed_split,
> > > > > > > + .enable_cb_prepare = virtqueue_enable_cb_prepare_split,
> > > > > > > + .poll = virtqueue_poll_split,
> > > > > > > + .detach_unused_buf = virtqueue_detach_unused_buf_split,
> > > > > > > + .more_used = more_used_split,
> > > > > > > + .resize = virtqueue_resize_split,
> > > > > > > + .reset = virtqueue_reset_split,
> > > > > > > +};
> > > > > > > +
> > > > > > > +struct virtqueue_ops packed_ops = {
> > > > > > > + .add = virtqueue_add_packed,
> > > > > > > + .get = virtqueue_get_buf_ctx_packed,
> > > > > > > + .kick_prepare = virtqueue_kick_prepare_packed,
> > > > > > > + .disable_cb = virtqueue_disable_cb_packed,
> > > > > > > + .enable_cb_delayed = virtqueue_enable_cb_delayed_packed,
> > > > > > > + .enable_cb_prepare = virtqueue_enable_cb_prepare_packed,
> > > > > > > + .poll = virtqueue_poll_packed,
> > > > > > > + .detach_unused_buf = virtqueue_detach_unused_buf_packed,
> > > > > > > + .more_used = more_used_packed,
> > > > > > > + .resize = virtqueue_resize_packed,
> > > > > > > + .reset = virtqueue_reset_packed,
> > > > > > > +};
> > > > > > > +
> > > > > > > static int virtqueue_disable_and_recycle(struct virtqueue *_vq,
> > > > > > > void (*recycle)(struct virtqueue *vq, void *buf))
> > > > > > > {
> > > > > > > @@ -2248,10 +2303,8 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > >
> > > > > > > - return vq->packed_ring ? virtqueue_add_packed(vq, sgs, total_sg,
> > > > > > > - out_sgs, in_sgs, data, ctx, premapped, gfp) :
> > > > > > > - virtqueue_add_split(vq, sgs, total_sg,
> > > > > > > - out_sgs, in_sgs, data, ctx, premapped, gfp);
> > > > > > > + return vq->ops->add(vq, sgs, total_sg,
> > > > > > > + out_sgs, in_sgs, data, ctx, premapped, gfp);
> > > > > > > }
> > > > > > >
> > > > > > > /**
> > > > > > > @@ -2437,8 +2490,7 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > >
> > > > > > > - return vq->packed_ring ? virtqueue_kick_prepare_packed(vq) :
> > > > > > > - virtqueue_kick_prepare_split(vq);
> > > > > > > + return vq->ops->kick_prepare(vq);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
> > > > > > >
> > > > > > > @@ -2508,8 +2560,7 @@ void *virtqueue_get_buf_ctx(struct virtqueue *_vq, unsigned int *len,
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > >
> > > > > > > - return vq->packed_ring ? virtqueue_get_buf_ctx_packed(vq, len, ctx) :
> > > > > > > - virtqueue_get_buf_ctx_split(vq, len, ctx);
> > > > > > > + return vq->ops->get(vq, len, ctx);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_get_buf_ctx);
> > > > > > >
> > > > > > > @@ -2531,10 +2582,7 @@ void virtqueue_disable_cb(struct virtqueue *_vq)
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > >
> > > > > > > - if (vq->packed_ring)
> > > > > > > - virtqueue_disable_cb_packed(vq);
> > > > > > > - else
> > > > > > > - virtqueue_disable_cb_split(vq);
> > > > > > > + return vq->ops->disable_cb(vq);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
> > > > > > >
> > > > > > > @@ -2557,8 +2605,7 @@ unsigned int virtqueue_enable_cb_prepare(struct virtqueue *_vq)
> > > > > > > if (vq->event_triggered)
> > > > > > > vq->event_triggered = false;
> > > > > > >
> > > > > > > - return vq->packed_ring ? virtqueue_enable_cb_prepare_packed(vq) :
> > > > > > > - virtqueue_enable_cb_prepare_split(vq);
> > > > > > > + return vq->ops->enable_cb_prepare(vq);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
> > > > > > >
> > > > > > > @@ -2579,8 +2626,7 @@ bool virtqueue_poll(struct virtqueue *_vq, unsigned int last_used_idx)
> > > > > > > return false;
> > > > > > >
> > > > > > > virtio_mb(vq->weak_barriers);
> > > > > > > - return vq->packed_ring ? virtqueue_poll_packed(vq, last_used_idx) :
> > > > > > > - virtqueue_poll_split(vq, last_used_idx);
> > > > > > > + return vq->ops->poll(vq, last_used_idx);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_poll);
> > > > > > >
> > > > > > > @@ -2623,8 +2669,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
> > > > > > > if (vq->event_triggered)
> > > > > > > vq->event_triggered = false;
> > > > > > >
> > > > > > > - return vq->packed_ring ? virtqueue_enable_cb_delayed_packed(vq) :
> > > > > > > - virtqueue_enable_cb_delayed_split(vq);
> > > > > > > + return vq->ops->enable_cb_delayed(vq);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
> > > > > > >
> > > > > > > @@ -2640,14 +2685,13 @@ void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
> > > > > > > {
> > > > > > > struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > >
> > > > > > > - return vq->packed_ring ? virtqueue_detach_unused_buf_packed(vq) :
> > > > > > > - virtqueue_detach_unused_buf_split(vq);
> > > > > > > + return vq->ops->detach_unused_buf(vq);
> > > > > > > }
> > > > > > > EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
> > > > > > >
> > > > > > > static inline bool more_used(const struct vring_virtqueue *vq)
> > > > > > > {
> > > > > > > - return vq->packed_ring ? more_used_packed(vq) : more_used_split(vq);
> > > > > > > + return vq->ops->more_used(vq);
> > > > > > > }
> > > > > > >
> > > > > > > /**
> > > > > > > @@ -2785,10 +2829,7 @@ int virtqueue_resize(struct virtqueue *_vq, u32 num,
> > > > > > > if (recycle_done)
> > > > > > > recycle_done(_vq);
> > > > > > >
> > > > > > > - if (vq->packed_ring)
> > > > > > > - err = virtqueue_resize_packed(vq, num);
> > > > > > > - else
> > > > > > > - err = virtqueue_resize_split(vq, num);
> > > > > > > + err = vq->ops->resize(vq, num);
> > > > > > >
> > > > > > > return virtqueue_enable_after_reset(_vq);
> > > > > > > }
> > > > > > > @@ -2822,10 +2863,7 @@ int virtqueue_reset(struct virtqueue *_vq,
> > > > > > > if (recycle_done)
> > > > > > > recycle_done(_vq);
> > > > > > >
> > > > > > > - if (vq->packed_ring)
> > > > > > > - virtqueue_reset_packed(vq);
> > > > > > > - else
> > > > > > > - virtqueue_reset_split(vq);
> > > > > > > + vq->ops->reset(vq);
> > > > > > >
> > > > > > > return virtqueue_enable_after_reset(_vq);
> > > > > > > }
> > > > > > > --
> > > > > > > 2.42.0
> > > > > >
> > > >
>
Powered by blists - more mailing lists