[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJaqyWe=UGMmL=AdDZpkQR5Og9bmuYUEDFN9L5cd1BLGmEFQaw@mail.gmail.com>
Date: Fri, 18 Mar 2022 08:35:14 +0100
From: Eugenio Perez Martin <eperezma@...hat.com>
To: Gautam Dawar <gautam.dawar@...inx.com>
Cc: Gautam Dawar <gdawar@...inx.com>,
Martin Petrus Hubertus Habets <martinh@...inx.com>,
Harpreet Singh Anand <hanand@...inx.com>,
Tanuj Murlidhar Kamde <tanujk@...inx.com>,
Jason Wang <jasowang@...hat.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
Zhu Lingshan <lingshan.zhu@...el.com>,
Stefano Garzarella <sgarzare@...hat.com>,
Xie Yongji <xieyongji@...edance.com>,
Eli Cohen <elic@...dia.com>,
Si-Wei Liu <si-wei.liu@...cle.com>,
Parav Pandit <parav@...dia.com>,
Longpeng <longpeng2@...wei.com>,
virtualization <virtualization@...ts.linux-foundation.org>,
linux-kernel@...r.kernel.org, kvm list <kvm@...r.kernel.org>,
netdev@...r.kernel.org
Subject: Re: [RFC PATCH v2 19/19] vdpasim: control virtqueue support
On Thu, Feb 24, 2022 at 10:29 PM Gautam Dawar <gautam.dawar@...inx.com> wrote:
>
> This patch introduces the control virtqueue support for vDPA
> simulator. This is a requirement for supporting advanced features like
> multiqueue.
>
> A requirement for control virtqueue is to isolate its memory access
> from the rx/tx virtqueues. This is because when using vDPA device
> for VM, the control virqueue is not directly assigned to VM. Userspace
> (Qemu) will present a shadow control virtqueue to control for
> recording the device states.
>
> The isolation is done via the virtqueue groups and ASID support in
> vDPA through vhost-vdpa. The simulator is extended to have:
>
> 1) three virtqueues: RXVQ, TXVQ and CVQ (control virtqueue)
> 2) two virtqueue groups: group 0 contains RXVQ and TXVQ; group 1
> contains CVQ
> 3) two address spaces and the simulator simply implements the address
> spaces by mapping it 1:1 to IOTLB.
>
> For the VM use cases, userspace(Qemu) may set AS 0 to group 0 and AS 1
> to group 1. So we have:
>
> 1) The IOTLB for virtqueue group 0 contains the mappings of guest, so
> RX and TX can be assigned to guest directly.
> 2) The IOTLB for virtqueue group 1 contains the mappings of CVQ which
> is the buffers that allocated and managed by VMM only. So CVQ of
> vhost-vdpa is visible to VMM only. And Guest can not access the CVQ
> of vhost-vdpa.
>
> For the other use cases, since AS 0 is associated to all virtqueue
> groups by default. All virtqueues share the same mapping by default.
>
> To demonstrate the function, VIRITO_NET_F_CTRL_MACADDR is
> implemented in the simulator for the driver to set mac address.
>
> Signed-off-by: Jason Wang <jasowang@...hat.com>
> Signed-off-by: Gautam Dawar <gdawar@...inx.com>
> ---
> drivers/vdpa/vdpa_sim/vdpa_sim.c | 91 ++++++++++++++++++++++------
> drivers/vdpa/vdpa_sim/vdpa_sim.h | 2 +
> drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 88 ++++++++++++++++++++++++++-
> 3 files changed, 161 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> index 659e2e2e4b0c..59611f18a3a8 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> @@ -96,11 +96,17 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
> {
> int i;
>
> - for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
> + spin_lock(&vdpasim->iommu_lock);
> +
> + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
> vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]);
> + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
> + &vdpasim->iommu_lock);
> + }
> +
> + for (i = 0; i < vdpasim->dev_attr.nas; i++)
> + vhost_iotlb_reset(&vdpasim->iommu[i]);
>
> - spin_lock(&vdpasim->iommu_lock);
> - vhost_iotlb_reset(vdpasim->iommu);
> spin_unlock(&vdpasim->iommu_lock);
>
> vdpasim->features = 0;
> @@ -145,7 +151,7 @@ static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
> dma_addr = iova_dma_addr(&vdpasim->iova, iova);
>
> spin_lock(&vdpasim->iommu_lock);
> - ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr,
> + ret = vhost_iotlb_add_range(&vdpasim->iommu[0], (u64)dma_addr,
> (u64)dma_addr + size - 1, (u64)paddr, perm);
> spin_unlock(&vdpasim->iommu_lock);
>
> @@ -161,7 +167,7 @@ static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
> size_t size)
> {
> spin_lock(&vdpasim->iommu_lock);
> - vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr,
> + vhost_iotlb_del_range(&vdpasim->iommu[0], (u64)dma_addr,
> (u64)dma_addr + size - 1);
> spin_unlock(&vdpasim->iommu_lock);
>
> @@ -250,8 +256,9 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
> else
> ops = &vdpasim_config_ops;
>
> - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, 1,
> - 1, dev_attr->name, false);
> + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
> + dev_attr->ngroups, dev_attr->nas,
> + dev_attr->name, false);
> if (IS_ERR(vdpasim)) {
> ret = PTR_ERR(vdpasim);
> goto err_alloc;
> @@ -278,16 +285,20 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
> if (!vdpasim->vqs)
> goto err_iommu;
>
> - vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0);
> + vdpasim->iommu = kmalloc_array(vdpasim->dev_attr.nas,
> + sizeof(*vdpasim->iommu), GFP_KERNEL);
> if (!vdpasim->iommu)
> goto err_iommu;
>
> + for (i = 0; i < vdpasim->dev_attr.nas; i++)
> + vhost_iotlb_init(&vdpasim->iommu[i], 0, 0);
> +
> vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL);
> if (!vdpasim->buffer)
> goto err_iommu;
>
> for (i = 0; i < dev_attr->nvqs; i++)
> - vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu,
> + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
> &vdpasim->iommu_lock);
>
> ret = iova_cache_get();
> @@ -401,7 +412,11 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa)
>
> static u32 vdpasim_get_vq_group(struct vdpa_device *vdpa, u16 idx)
> {
> - return 0;
> + /* RX and TX belongs to group 0, CVQ belongs to group 1 */
> + if (idx == 2)
> + return 1;
> + else
> + return 0;
> }
>
> static u64 vdpasim_get_device_features(struct vdpa_device *vdpa)
> @@ -539,20 +554,53 @@ static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa)
> return range;
> }
>
> +static int vdpasim_set_group_asid(struct vdpa_device *vdpa, unsigned int group,
> + unsigned int asid)
> +{
> + struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> + struct vhost_iotlb *iommu;
> + int i;
> +
> + if (group > vdpasim->dev_attr.ngroups)
> + return -EINVAL;
> +
> + if (asid > vdpasim->dev_attr.nas)
> + return -EINVAL;
> +
> + iommu = &vdpasim->iommu[asid];
> +
> + spin_lock(&vdpasim->lock);
> +
> + for (i = 0; i < vdpasim->dev_attr.nvqs; i++)
> + if (vdpasim_get_vq_group(vdpa, i) == group)
> + vringh_set_iotlb(&vdpasim->vqs[i].vring, &vdpasim->iommu[0],
The second argument to "vringh_set_iotlb" call must be simply "iommu".
If not, we're effectively setting asid 0 to all virtqueue groups that
match "group", making it impossible to change it.
Thanks!
> + &vdpasim->iommu_lock);
> +
> + spin_unlock(&vdpasim->lock);
> +
> + return 0;
> +}
> +
> static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
> struct vhost_iotlb *iotlb)
> {
> struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> struct vhost_iotlb_map *map;
> + struct vhost_iotlb *iommu;
> u64 start = 0ULL, last = 0ULL - 1;
> int ret;
>
> + if (asid >= vdpasim->dev_attr.nas)
> + return -EINVAL;
> +
> spin_lock(&vdpasim->iommu_lock);
> - vhost_iotlb_reset(vdpasim->iommu);
> +
> + iommu = &vdpasim->iommu[asid];
> + vhost_iotlb_reset(iommu);
>
> for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
> map = vhost_iotlb_itree_next(map, start, last)) {
> - ret = vhost_iotlb_add_range(vdpasim->iommu, map->start,
> + ret = vhost_iotlb_add_range(iommu, map->start,
> map->last, map->addr, map->perm);
> if (ret)
> goto err;
> @@ -561,7 +609,7 @@ static int vdpasim_set_map(struct vdpa_device *vdpa, unsigned int asid,
> return 0;
>
> err:
> - vhost_iotlb_reset(vdpasim->iommu);
> + vhost_iotlb_reset(iommu);
> spin_unlock(&vdpasim->iommu_lock);
> return ret;
> }
> @@ -573,9 +621,12 @@ static int vdpasim_dma_map(struct vdpa_device *vdpa, unsigned int asid,
> struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
> int ret;
>
> + if (asid >= vdpasim->dev_attr.nas)
> + return -EINVAL;
> +
> spin_lock(&vdpasim->iommu_lock);
> - ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1,
> - pa, perm, opaque);
> + ret = vhost_iotlb_add_range_ctx(&vdpasim->iommu[asid], iova,
> + iova + size - 1, pa, perm, opaque);
> spin_unlock(&vdpasim->iommu_lock);
>
> return ret;
> @@ -586,8 +637,11 @@ static int vdpasim_dma_unmap(struct vdpa_device *vdpa, unsigned int asid,
> {
> struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
>
> + if (asid >= vdpasim->dev_attr.nas)
> + return -EINVAL;
> +
> spin_lock(&vdpasim->iommu_lock);
> - vhost_iotlb_del_range(vdpasim->iommu, iova, iova + size - 1);
> + vhost_iotlb_del_range(&vdpasim->iommu[asid], iova, iova + size - 1);
> spin_unlock(&vdpasim->iommu_lock);
>
> return 0;
> @@ -611,8 +665,7 @@ static void vdpasim_free(struct vdpa_device *vdpa)
> }
>
> kvfree(vdpasim->buffer);
> - if (vdpasim->iommu)
> - vhost_iotlb_free(vdpasim->iommu);
> + vhost_iotlb_free(vdpasim->iommu);
> kfree(vdpasim->vqs);
> kfree(vdpasim->config);
> }
> @@ -643,6 +696,7 @@ static const struct vdpa_config_ops vdpasim_config_ops = {
> .set_config = vdpasim_set_config,
> .get_generation = vdpasim_get_generation,
> .get_iova_range = vdpasim_get_iova_range,
> + .set_group_asid = vdpasim_set_group_asid,
> .dma_map = vdpasim_dma_map,
> .dma_unmap = vdpasim_dma_unmap,
> .free = vdpasim_free,
> @@ -674,6 +728,7 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = {
> .set_config = vdpasim_set_config,
> .get_generation = vdpasim_get_generation,
> .get_iova_range = vdpasim_get_iova_range,
> + .set_group_asid = vdpasim_set_group_asid,
> .set_map = vdpasim_set_map,
> .free = vdpasim_free,
> };
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> index 0be7c1e7ef80..622782e92239 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
> @@ -41,6 +41,8 @@ struct vdpasim_dev_attr {
> size_t buffer_size;
> int nvqs;
> u32 id;
> + u32 ngroups;
> + u32 nas;
>
> work_func_t work_fn;
> void (*get_config)(struct vdpasim *vdpasim, void *config);
> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> index ed5ade4ae570..513970c05af2 100644
> --- a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c
> @@ -26,10 +26,15 @@
> #define DRV_LICENSE "GPL v2"
>
> #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \
> + (1ULL << VIRTIO_NET_F_MTU) | \
> (1ULL << VIRTIO_NET_F_MAC) | \
> - (1ULL << VIRTIO_NET_F_MTU));
> + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \
> + (1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR));
>
> -#define VDPASIM_NET_VQ_NUM 2
> +/* 3 virtqueues, 2 address spaces, 2 virtqueue groups */
> +#define VDPASIM_NET_VQ_NUM 3
> +#define VDPASIM_NET_AS_NUM 2
> +#define VDPASIM_NET_GROUP_NUM 2
>
> static void vdpasim_net_complete(struct vdpasim_virtqueue *vq, size_t len)
> {
> @@ -63,6 +68,81 @@ static bool receive_filter(struct vdpasim *vdpasim, size_t len)
> return false;
> }
>
> +static virtio_net_ctrl_ack vdpasim_handle_ctrl_mac(struct vdpasim *vdpasim,
> + u8 cmd)
> +{
> + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> + size_t read;
> +
> + switch (cmd) {
> + case VIRTIO_NET_CTRL_MAC_ADDR_SET:
> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov,
> + (void *)vdpasim->config.mac,
> + ETH_ALEN);
> + if (read == ETH_ALEN)
> + status = VIRTIO_NET_OK;
> + break;
> + default:
> + break;
> + }
> +
> + return status;
> +}
> +
> +static void vdpasim_handle_cvq(struct vdpasim *vdpasim)
> +{
> + struct vdpasim_virtqueue *cvq = &vdpasim->vqs[2];
> + virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
> + struct virtio_net_ctrl_hdr ctrl;
> + size_t read, write;
> + int err;
> +
> + if (!(vdpasim->features & (1ULL << VIRTIO_NET_F_CTRL_VQ)))
> + return;
> +
> + if (!cvq->ready)
> + return;
> +
> + while (true) {
> + err = vringh_getdesc_iotlb(&cvq->vring, &cvq->in_iov,
> + &cvq->out_iov,
> + &cvq->head, GFP_ATOMIC);
> + if (err <= 0)
> + break;
> +
> + read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->in_iov, &ctrl,
> + sizeof(ctrl));
> + if (read != sizeof(ctrl))
> + break;
> +
> + switch (ctrl.class) {
> + case VIRTIO_NET_CTRL_MAC:
> + status = vdpasim_handle_ctrl_mac(vdpasim, ctrl.cmd);
> + break;
> + default:
> + break;
> + }
> +
> + /* Make sure data is wrote before advancing index */
> + smp_wmb();
> +
> + write = vringh_iov_push_iotlb(&cvq->vring, &cvq->out_iov,
> + &status, sizeof(status));
> + vringh_complete_iotlb(&cvq->vring, cvq->head, write);
> + vringh_kiov_cleanup(&cvq->in_iov);
> + vringh_kiov_cleanup(&cvq->out_iov);
> +
> + /* Make sure used is visible before rasing the interrupt. */
> + smp_wmb();
> +
> + local_bh_disable();
> + if (cvq->cb)
> + cvq->cb(cvq->private);
> + local_bh_enable();
> + }
> +}
> +
> static void vdpasim_net_work(struct work_struct *work)
> {
> struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
> @@ -77,6 +157,8 @@ static void vdpasim_net_work(struct work_struct *work)
> if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
> goto out;
>
> + vdpasim_handle_cvq(vdpasim);
> +
> if (!txq->ready || !rxq->ready)
> goto out;
>
> @@ -162,6 +244,8 @@ static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
> dev_attr.id = VIRTIO_ID_NET;
> dev_attr.supported_features = VDPASIM_NET_FEATURES;
> dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
> + dev_attr.ngroups = VDPASIM_NET_GROUP_NUM;
> + dev_attr.nas = VDPASIM_NET_AS_NUM;
> dev_attr.config_size = sizeof(struct virtio_net_config);
> dev_attr.get_config = vdpasim_net_get_config;
> dev_attr.work_fn = vdpasim_net_work;
> --
> 2.25.0
>
Powered by blists - more mailing lists