[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACycT3s7MMWDiwOC2XFSupbG9-f3WqtxzS4yfyYKhbC39JyF9g@mail.gmail.com>
Date: Thu, 4 Mar 2021 13:40:58 +0800
From: Yongji Xie <xieyongji@...edance.com>
To: Jason Wang <jasowang@...hat.com>
Cc: "Michael S. Tsirkin" <mst@...hat.com>,
Stefan Hajnoczi <stefanha@...hat.com>,
Stefano Garzarella <sgarzare@...hat.com>,
Parav Pandit <parav@...dia.com>, Bob Liu <bob.liu@...cle.com>,
Christoph Hellwig <hch@...radead.org>,
Randy Dunlap <rdunlap@...radead.org>,
Matthew Wilcox <willy@...radead.org>, viro@...iv.linux.org.uk,
Jens Axboe <axboe@...nel.dk>, bcrl@...ck.org,
Jonathan Corbet <corbet@....net>,
virtualization@...ts.linux-foundation.org, netdev@...r.kernel.org,
kvm@...r.kernel.org, linux-aio@...ck.org,
linux-fsdevel@...r.kernel.org
Subject: Re: Re: [RFC v4 05/11] vdpa: Support transferring virtual addressing
during DMA mapping
On Thu, Mar 4, 2021 at 11:07 AM Jason Wang <jasowang@...hat.com> wrote:
>
>
> On 2021/2/23 7:50 下午, Xie Yongji wrote:
> > This patch introduces an attribute for vDPA device to indicate
> > whether virtual address can be used. If vDPA device driver set
> > it, vhost-vdpa bus driver will not pin user page and transfer
> > userspace virtual address instead of physical address during
> > DMA mapping. And corresponding vma->vm_file and offset will be
> > also passed as an opaque pointer.
> >
> > Suggested-by: Jason Wang <jasowang@...hat.com>
> > Signed-off-by: Xie Yongji <xieyongji@...edance.com>
> > ---
> > drivers/vdpa/ifcvf/ifcvf_main.c | 2 +-
> > drivers/vdpa/mlx5/net/mlx5_vnet.c | 2 +-
> > drivers/vdpa/vdpa.c | 9 +++-
> > drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +-
> > drivers/vhost/vdpa.c | 104 +++++++++++++++++++++++++++++++-------
> > include/linux/vdpa.h | 20 ++++++--
> > 6 files changed, 113 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c
> > index 7c8bbfcf6c3e..228b9f920fea 100644
> > --- a/drivers/vdpa/ifcvf/ifcvf_main.c
> > +++ b/drivers/vdpa/ifcvf/ifcvf_main.c
> > @@ -432,7 +432,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> >
> > adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
> > dev, &ifc_vdpa_ops,
> > - IFCVF_MAX_QUEUE_PAIRS * 2, NULL);
> > + IFCVF_MAX_QUEUE_PAIRS * 2, NULL, false);
> > if (adapter == NULL) {
> > IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
> > return -ENOMEM;
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index 029822060017..54290438da28 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -1964,7 +1964,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
> > max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
> >
> > ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
> > - 2 * mlx5_vdpa_max_qps(max_vqs), NULL);
> > + 2 * mlx5_vdpa_max_qps(max_vqs), NULL, false);
> > if (IS_ERR(ndev))
> > return PTR_ERR(ndev);
> >
> > diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
> > index 9700a0adcca0..fafc0ee5eb05 100644
> > --- a/drivers/vdpa/vdpa.c
> > +++ b/drivers/vdpa/vdpa.c
> > @@ -72,6 +72,7 @@ static void vdpa_release_dev(struct device *d)
> > * @nvqs: number of virtqueues supported by this device
> > * @size: size of the parent structure that contains private data
> > * @name: name of the vdpa device; optional.
> > + * @use_va: indicate whether virtual address can be used by this device
>
>
> I think "use_va" means va must be used instead of "can be" here.
>
Right.
>
> > *
> > * Driver should use vdpa_alloc_device() wrapper macro instead of
> > * using this directly.
> > @@ -81,7 +82,8 @@ static void vdpa_release_dev(struct device *d)
> > */
> > struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> > const struct vdpa_config_ops *config,
> > - int nvqs, size_t size, const char *name)
> > + int nvqs, size_t size, const char *name,
> > + bool use_va)
> > {
> > struct vdpa_device *vdev;
> > int err = -EINVAL;
> > @@ -92,6 +94,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> > if (!!config->dma_map != !!config->dma_unmap)
> > goto err;
> >
> > + /* It should only work for the device that use on-chip IOMMU */
> > + if (use_va && !(config->dma_map || config->set_map))
> > + goto err;
> > +
> > err = -ENOMEM;
> > vdev = kzalloc(size, GFP_KERNEL);
> > if (!vdev)
> > @@ -108,6 +114,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
> > vdev->config = config;
> > vdev->features_valid = false;
> > vdev->nvqs = nvqs;
> > + vdev->use_va = use_va;
> >
> > if (name)
> > err = dev_set_name(&vdev->dev, "%s", name);
> > diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > index 5cfc262ce055..3a9a2dd4e987 100644
> > --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
> > @@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
> > ops = &vdpasim_config_ops;
> >
> > vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
> > - dev_attr->nvqs, dev_attr->name);
> > + dev_attr->nvqs, dev_attr->name, false);
> > if (!vdpasim)
> > goto err_alloc;
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index 70857fe3263c..93769ace34df 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -480,21 +480,31 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> > static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
> > {
> > struct vhost_dev *dev = &v->vdev;
> > + struct vdpa_device *vdpa = v->vdpa;
> > struct vhost_iotlb *iotlb = dev->iotlb;
> > struct vhost_iotlb_map *map;
> > + struct vdpa_map_file *map_file;
> > struct page *page;
> > unsigned long pfn, pinned;
> >
> > while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
> > - pinned = map->size >> PAGE_SHIFT;
> > - for (pfn = map->addr >> PAGE_SHIFT;
> > - pinned > 0; pfn++, pinned--) {
> > - page = pfn_to_page(pfn);
> > - if (map->perm & VHOST_ACCESS_WO)
> > - set_page_dirty_lock(page);
> > - unpin_user_page(page);
> > + if (!vdpa->use_va) {
> > + pinned = map->size >> PAGE_SHIFT;
> > + for (pfn = map->addr >> PAGE_SHIFT;
> > + pinned > 0; pfn++, pinned--) {
> > + page = pfn_to_page(pfn);
> > + if (map->perm & VHOST_ACCESS_WO)
> > + set_page_dirty_lock(page);
> > + unpin_user_page(page);
> > + }
> > + atomic64_sub(map->size >> PAGE_SHIFT,
> > + &dev->mm->pinned_vm);
> > + } else {
> > + map_file = (struct vdpa_map_file *)map->opaque;
> > + if (map_file->file)
> > + fput(map_file->file);
> > + kfree(map_file);
> > }
> > - atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
> > vhost_iotlb_map_free(iotlb, map);
> > }
> > }
> > @@ -530,21 +540,21 @@ static int perm_to_iommu_flags(u32 perm)
> > return flags | IOMMU_CACHE;
> > }
> >
> > -static int vhost_vdpa_map(struct vhost_vdpa *v,
> > - u64 iova, u64 size, u64 pa, u32 perm)
> > +static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
> > + u64 size, u64 pa, u32 perm, void *opaque)
> > {
> > struct vhost_dev *dev = &v->vdev;
> > struct vdpa_device *vdpa = v->vdpa;
> > const struct vdpa_config_ops *ops = vdpa->config;
> > int r = 0;
> >
> > - r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
> > - pa, perm);
> > + r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
> > + pa, perm, opaque);
> > if (r)
> > return r;
> >
> > if (ops->dma_map) {
> > - r = ops->dma_map(vdpa, iova, size, pa, perm, NULL);
> > + r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
> > } else if (ops->set_map) {
> > if (!v->in_batch)
> > r = ops->set_map(vdpa, dev->iotlb);
> > @@ -552,13 +562,15 @@ static int vhost_vdpa_map(struct vhost_vdpa *v,
> > r = iommu_map(v->domain, iova, pa, size,
> > perm_to_iommu_flags(perm));
> > }
> > -
> > - if (r)
> > + if (r) {
> > vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
> > - else
> > + return r;
> > + }
> > +
> > + if (!vdpa->use_va)
> > atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
> >
> > - return r;
> > + return 0;
> > }
> >
> > static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
> > @@ -579,10 +591,60 @@ static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
> > }
> > }
> >
> > +static int vhost_vdpa_va_map(struct vhost_vdpa *v,
> > + u64 iova, u64 size, u64 uaddr, u32 perm)
> > +{
> > + struct vhost_dev *dev = &v->vdev;
> > + u64 offset, map_size, map_iova = iova;
> > + struct vdpa_map_file *map_file;
> > + struct vm_area_struct *vma;
> > + int ret;
> > +
> > + mmap_read_lock(dev->mm);
> > +
> > + while (size) {
> > + vma = find_vma(dev->mm, uaddr);
> > + if (!vma) {
> > + ret = -EINVAL;
> > + goto err;
> > + }
> > + map_size = min(size, vma->vm_end - uaddr);
> > + offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
> > + map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
> > + if (!map_file) {
> > + ret = -ENOMEM;
> > + goto err;
> > + }
> > + if (vma->vm_file && (vma->vm_flags & VM_SHARED) &&
> > + !(vma->vm_flags & (VM_IO | VM_PFNMAP))) {
> > + map_file->file = get_file(vma->vm_file);
> > + map_file->offset = offset;
> > + }
>
>
> I think it's better to do the flag check right after find_vma(), this
> can avoid things like kfree etc (e.g the code will still call
> vhost_vdpa_map() even if the flag is not expected now).
>
Make sense to me.
>
> > + ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
> > + perm, map_file);
> > + if (ret) {
> > + if (map_file->file)
> > + fput(map_file->file);
> > + kfree(map_file);
> > + goto err;
> > + }
> > + size -= map_size;
> > + uaddr += map_size;
> > + map_iova += map_size;
> > + }
> > + mmap_read_unlock(dev->mm);
> > +
> > + return 0;
> > +err:
> > + vhost_vdpa_unmap(v, iova, map_iova - iova);
> > + return ret;
> > +}
> > +
> > static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> > struct vhost_iotlb_msg *msg)
> > {
> > struct vhost_dev *dev = &v->vdev;
> > + struct vdpa_device *vdpa = v->vdpa;
> > struct vhost_iotlb *iotlb = dev->iotlb;
> > struct page **page_list;
> > unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
> > @@ -601,6 +663,10 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
> > msg->iova + msg->size - 1))
> > return -EEXIST;
> >
> > + if (vdpa->use_va)
> > + return vhost_vdpa_va_map(v, msg->iova, msg->size,
> > + msg->uaddr, msg->perm);
>
>
> If possible, I would like to factor out the pa map below into a
> something like vhost_vdpa_pa_map() first with a separated patch. Then
> introduce vhost_vdpa_va_map().
>
Fine, will do it.
Thanks,
Yongji
Powered by blists - more mailing lists