[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACGkMEt19u07b_2GkT_tEBhpKJj97VoF-jcSqoaTyEULoWvdFw@mail.gmail.com>
Date: Wed, 11 Sep 2024 11:54:25 +0800
From: Jason Wang <jasowang@...hat.com>
To: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
Cc: netdev@...r.kernel.org, "Michael S. Tsirkin" <mst@...hat.com>,
Eugenio Pérez <eperezma@...hat.com>,
"David S. Miller" <davem@...emloft.net>, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>, Jesper Dangaard Brouer <hawk@...nel.org>,
John Fastabend <john.fastabend@...il.com>, virtualization@...ts.linux.dev,
bpf@...r.kernel.org
Subject: Re: [PATCH net-next 04/13] virtio_ring: perform premapped operations
based on per-buffer
On Tue, Aug 20, 2024 at 3:33 PM Xuan Zhuo <xuanzhuo@...ux.alibaba.com> wrote:
>
> The current configuration sets the virtqueue (vq) to premapped mode,
> implying that all buffers submitted to this queue must be mapped ahead
> of time. This presents a challenge for the virtnet send queue (sq): the
> virtnet driver would be required to keep track of dma information for vq
> size * 17, which can be substantial. However, if the premapped mode were
> applied on a per-buffer basis, the complexity would be greatly reduced.
> With AF_XDP enabled, AF_XDP buffers would become premapped, while kernel
> skb buffers could remain unmapped.
Is this only applied to TX or both TX and RX.
>
> We can distinguish them by sg_page(sg), When sg_page(sg) is NULL, this
> indicates that the driver has performed DMA mapping in advance, allowing
> the Virtio core to directly utilize sg_dma_address(sg) without
> conducting any internal DMA mapping.
This seems conflict with the code below?
#define sg_is_premapped(sg) (!sg_page(sg))
And rethink of the design, a question is that is there a chance that
VM_PFNMAP area could be used for virtio-net? If it is true, the trick
for sg_page() can not work?
A quick glance told me AF_XEP seems to be safe as it uses
pin_user_pages(), but we need to check other possibilities.
Or we need to fall back to our previous idea, having new APIs.
> Additionally, DMA unmap operations
> for this buffer will be bypassed.
>
> Suggested-by: Jason Wang <jasowang@...hat.com>
> Signed-off-by: Xuan Zhuo <xuanzhuo@...ux.alibaba.com>
> ---
> drivers/virtio/virtio_ring.c | 70 +++++++++++++++++++++---------------
> 1 file changed, 41 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index b43eca93015c..7efddc71af67 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -235,6 +235,7 @@ static void vring_free(struct virtqueue *_vq);
> */
>
> #define to_vvq(_vq) container_of_const(_vq, struct vring_virtqueue, vq)
> +#define sg_is_premapped(sg) (!sg_page(sg))
>
> static bool virtqueue_use_indirect(const struct vring_virtqueue *vq,
> unsigned int total_sg)
> @@ -292,9 +293,10 @@ static bool vring_use_dma_api(const struct virtio_device *vdev)
> return false;
> }
>
> -static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring)
> +static bool vring_need_unmap_buffer(const struct vring_virtqueue *vring,
> + const struct vring_desc_extra *extra)
> {
> - return vring->use_dma_api && !vring->premapped;
> + return vring->use_dma_api && (extra->addr != DMA_MAPPING_ERROR);
> }
>
> size_t virtio_max_dma_size(const struct virtio_device *vdev)
> @@ -366,7 +368,7 @@ static struct device *vring_dma_dev(const struct vring_virtqueue *vq)
> static int vring_map_one_sg(const struct vring_virtqueue *vq, struct scatterlist *sg,
> enum dma_data_direction direction, dma_addr_t *addr)
> {
> - if (vq->premapped) {
> + if (sg_is_premapped(sg)) {
> *addr = sg_dma_address(sg);
> return 0;
> }
> @@ -457,7 +459,7 @@ static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq,
> (flags & VRING_DESC_F_WRITE) ?
> DMA_FROM_DEVICE : DMA_TO_DEVICE);
> } else {
> - if (!vring_need_unmap_buffer(vq))
> + if (!vring_need_unmap_buffer(vq, extra))
> goto out;
>
> dma_unmap_page(vring_dma_dev(vq),
> @@ -510,7 +512,7 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
> dma_addr_t addr,
> unsigned int len,
> u16 flags,
> - bool indirect)
> + bool indirect, bool premapped)
> {
> u16 next;
>
> @@ -518,7 +520,7 @@ static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq,
> desc[i].addr = cpu_to_virtio64(vq->vdev, addr);
> desc[i].len = cpu_to_virtio32(vq->vdev, len);
>
> - extra[i].addr = addr;
> + extra[i].addr = premapped ? DMA_MAPPING_ERROR : addr;
> extra[i].len = len;
> extra[i].flags = flags;
>
> @@ -611,7 +613,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> */
> i = virtqueue_add_desc_split(_vq, desc, extra, i, addr, sg->length,
> VRING_DESC_F_NEXT,
> - indirect);
> + indirect, sg_is_premapped(sg));
> }
> }
> for (; n < (out_sgs + in_sgs); n++) {
> @@ -629,12 +631,12 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> sg->length,
> VRING_DESC_F_NEXT |
> VRING_DESC_F_WRITE,
> - indirect);
> + indirect, sg_is_premapped(sg));
> }
> }
> /* Last one doesn't continue. */
> desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
> - if (!indirect && vring_need_unmap_buffer(vq))
> + if (!indirect && vring_need_unmap_buffer(vq, &extra[prev]))
> vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &=
> ~VRING_DESC_F_NEXT;
>
> @@ -643,19 +645,15 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> dma_addr_t addr = vring_map_single(
> vq, desc, total_sg * sizeof(struct vring_desc),
> DMA_TO_DEVICE);
> - if (vring_mapping_error(vq, addr)) {
> - if (vq->premapped)
> - goto free_indirect;
> -
> + if (vring_mapping_error(vq, addr))
> goto unmap_release;
> - }
>
> virtqueue_add_desc_split(_vq, vq->split.vring.desc,
> vq->split.desc_extra,
> head, addr,
> total_sg * sizeof(struct vring_desc),
> VRING_DESC_F_INDIRECT,
> - false);
> + false, false);
> }
>
> /* We're using some buffers from the free list. */
> @@ -712,7 +710,6 @@ static inline int virtqueue_add_split(struct virtqueue *_vq,
> i = vring_unmap_one_split(vq, &extra[i]);
> }
>
> -free_indirect:
> if (indirect)
> kfree(desc);
>
> @@ -794,7 +791,7 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head,
> VRING_DESC_F_INDIRECT));
> BUG_ON(len == 0 || len % sizeof(struct vring_desc));
>
> - if (vring_need_unmap_buffer(vq)) {
> + if (vq->use_dma_api) {
> for (j = 0; j < len / sizeof(struct vring_desc); j++)
> vring_unmap_one_split(vq, &extra[j]);
> }
> @@ -1228,7 +1225,7 @@ static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
> (flags & VRING_DESC_F_WRITE) ?
> DMA_FROM_DEVICE : DMA_TO_DEVICE);
> } else {
> - if (!vring_need_unmap_buffer(vq))
> + if (!vring_need_unmap_buffer(vq, extra))
> return;
>
> dma_unmap_page(vring_dma_dev(vq),
> @@ -1309,7 +1306,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> i++;
>
> if (unlikely(vq->use_dma_api)) {
> - extra[i].addr = addr;
> + extra[i].addr = sg_is_premapped(sg) ? DMA_MAPPING_ERROR : addr;
> extra[i].len = sg->length;
> extra[i].flags = n < out_sgs ? 0 : VRING_DESC_F_WRITE;
> }
> @@ -1320,12 +1317,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> addr = vring_map_single(vq, desc,
> total_sg * sizeof(struct vring_packed_desc),
> DMA_TO_DEVICE);
> - if (vring_mapping_error(vq, addr)) {
> - if (vq->premapped)
> - goto free_desc;
> -
> + if (vring_mapping_error(vq, addr))
> goto unmap_release;
> - }
>
> vq->packed.vring.desc[head].addr = cpu_to_le64(addr);
> vq->packed.vring.desc[head].len = cpu_to_le32(total_sg *
> @@ -1383,7 +1376,6 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq,
> for (i = 0; i < err_idx; i++)
> vring_unmap_extra_packed(vq, &extra[i]);
>
> -free_desc:
> kfree(desc);
>
> END_USE(vq);
> @@ -1474,7 +1466,8 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq,
> desc[i].id = cpu_to_le16(id);
>
> if (unlikely(vq->use_dma_api)) {
> - vq->packed.desc_extra[curr].addr = addr;
> + vq->packed.desc_extra[curr].addr = sg_is_premapped(sg) ?
> + DMA_MAPPING_ERROR : addr;
> vq->packed.desc_extra[curr].len = sg->length;
> vq->packed.desc_extra[curr].flags =
> le16_to_cpu(flags);
> @@ -1625,10 +1618,9 @@ static void detach_buf_packed(struct vring_virtqueue *vq,
> if (!extra)
> return;
>
> - if (vring_need_unmap_buffer(vq)) {
> + if (vq->use_dma_api) {
> len = vq->packed.desc_extra[id].len;
> - for (i = 0; i < len / sizeof(struct vring_packed_desc);
> - i++)
> + for (i = 0; i < len / sizeof(struct vring_packed_desc); i++)
Unnecessary changes?
> vring_unmap_extra_packed(vq, &extra[i]);
> }
> kfree(desc);
> @@ -2212,6 +2204,11 @@ static inline int virtqueue_add(struct virtqueue *_vq,
> * @data: the token identifying the buffer.
> * @gfp: how to do memory allocations (if necessary).
> *
> + * When sg_page(sg) is NULL, this indicates that the driver has performed DMA
> + * mapping in advance, allowing the virtio core to directly utilize
> + * sg_dma_address(sg) without conducting any internal DMA mapping. Additionally,
> + * DMA unmap operations for this buffer will be bypassed.
> + *
> * Caller must ensure we don't call this with other virtqueue operations
> * at the same time (except where noted).
> *
> @@ -2246,6 +2243,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
> * @data: the token identifying the buffer.
> * @gfp: how to do memory allocations (if necessary).
> *
> + * When sg_page(sg) is NULL, this indicates that the driver has performed DMA
> + * mapping in advance, allowing the virtio core to directly utilize
> + * sg_dma_address(sg) without conducting any internal DMA mapping. Additionally,
> + * DMA unmap operations for this buffer will be bypassed.
> + *
> * Caller must ensure we don't call this with other virtqueue operations
> * at the same time (except where noted).
> *
> @@ -2268,6 +2270,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
> * @data: the token identifying the buffer.
> * @gfp: how to do memory allocations (if necessary).
> *
> + * When sg_page(sg) is NULL, this indicates that the driver has performed DMA
> + * mapping in advance, allowing the virtio core to directly utilize
> + * sg_dma_address(sg) without conducting any internal DMA mapping. Additionally,
> + * DMA unmap operations for this buffer will be bypassed.
> + *
> * Caller must ensure we don't call this with other virtqueue operations
> * at the same time (except where noted).
> *
> @@ -2291,6 +2298,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
> * @ctx: extra context for the token
> * @gfp: how to do memory allocations (if necessary).
> *
> + * When sg_page(sg) is NULL, this indicates that the driver has performed DMA
> + * mapping in advance, allowing the virtio core to directly utilize
> + * sg_dma_address(sg) without conducting any internal DMA mapping. Additionally,
> + * DMA unmap operations for this buffer will be bypassed.
> + *
> * Caller must ensure we don't call this with other virtqueue operations
> * at the same time (except where noted).
> *
> --
> 2.32.0.3.g01195cf9f
>
Thanks
Powered by blists - more mailing lists