[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221216081303.p4pcveclfa5n4slw@sgarzare-redhat>
Date: Fri, 16 Dec 2022 09:13:03 +0100
From: Stefano Garzarella <sgarzare@...hat.com>
To: Jason Wang <jasowang@...hat.com>
Cc: virtualization@...ts.linux-foundation.org,
Andrey Zhadchenko <andrey.zhadchenko@...tuozzo.com>,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
"Michael S. Tsirkin" <mst@...hat.com>, eperezma@...hat.com,
stefanha@...hat.com, netdev@...r.kernel.org
Subject: Re: [RFC PATCH 6/6] vdpa_sim: add support for user VA
On Fri, Dec 16, 2022 at 03:26:46PM +0800, Jason Wang wrote:
>On Thu, Dec 15, 2022 at 12:31 AM Stefano Garzarella <sgarzare@...hat.com> wrote:
>>
>> The new "use_va" module parameter (default: false) is used in
>> vdpa_alloc_device() to inform the vDPA framework that the device
>> supports VA.
>>
>> vringh is initialized to use VA only when "use_va" is true and the
>> user's mm has been bound. So, only when the bus supports user VA
>> (e.g. vhost-vdpa).
>>
>> vdpasim_mm_work_fn work is used to attach the kthread to the user
>> address space when the .bind_mm callback is invoked, and to detach
>> it when the device is reset.
>
>One thing in my mind is that the current datapath is running under
>spinlock which prevents us from using iov_iter (which may have page
>faults).
>
>We need to get rid of the spinlock first.
Right! I already have a patch for that since I used for the vdpa-blk
software device in-kernel PoC where I had the same issue.
I'll add it to the series!
>
>>
>> Signed-off-by: Stefano Garzarella <sgarzare@...hat.com>
>> ---
>> drivers/vdpa/vdpa_sim/vdpa_sim.h | 1 +
>> drivers/vdpa/vdpa_sim/vdpa_sim.c | 104 ++++++++++++++++++++++++++++++-
>> 2 files changed, 103 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> index 07ef53ea375e..1b010e5c0445 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h
>> @@ -55,6 +55,7 @@ struct vdpasim {
>> struct vdpasim_virtqueue *vqs;
>> struct kthread_worker *worker;
>> struct kthread_work work;
>> + struct mm_struct *mm_bound;
>> struct vdpasim_dev_attr dev_attr;
>> /* spinlock to synchronize virtqueue state */
>> spinlock_t lock;
>> diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> index 36a1d2e0a6ba..6e07cedef30c 100644
>> --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
>> @@ -36,10 +36,90 @@ module_param(max_iotlb_entries, int, 0444);
>> MODULE_PARM_DESC(max_iotlb_entries,
>> "Maximum number of iotlb entries for each address space. 0 means unlimited. (default: 2048)");
>>
>> +static bool use_va;
>> +module_param(use_va, bool, 0444);
>> +MODULE_PARM_DESC(use_va, "Enable the device's ability to use VA");
>> +
>> #define VDPASIM_QUEUE_ALIGN PAGE_SIZE
>> #define VDPASIM_QUEUE_MAX 256
>> #define VDPASIM_VENDOR_ID 0
>>
>> +struct vdpasim_mm_work {
>> + struct kthread_work work;
>> + struct task_struct *owner;
>> + struct mm_struct *mm;
>> + bool bind;
>> + int ret;
>> +};
>> +
>> +static void vdpasim_mm_work_fn(struct kthread_work *work)
>> +{
>> + struct vdpasim_mm_work *mm_work =
>> + container_of(work, struct vdpasim_mm_work, work);
>> +
>> + mm_work->ret = 0;
>> +
>> + if (mm_work->bind) {
>> + kthread_use_mm(mm_work->mm);
>> +#if 0
>> + if (mm_work->owner)
>> + mm_work->ret = cgroup_attach_task_all(mm_work->owner,
>> + current);
>> +#endif
>> + } else {
>> +#if 0
>> + //TODO: check it
>> + cgroup_release(current);
>> +#endif
>> + kthread_unuse_mm(mm_work->mm);
>> + }
>> +}
>> +
>> +static void vdpasim_worker_queue_mm(struct vdpasim *vdpasim,
>> + struct vdpasim_mm_work *mm_work)
>> +{
>> + struct kthread_work *work = &mm_work->work;
>> +
>> + kthread_init_work(work, vdpasim_mm_work_fn);
>> + kthread_queue_work(vdpasim->worker, work);
>> +
>> + spin_unlock(&vdpasim->lock);
>> + kthread_flush_work(work);
>> + spin_lock(&vdpasim->lock);
>> +}
>> +
>> +static int vdpasim_worker_bind_mm(struct vdpasim *vdpasim,
>> + struct mm_struct *new_mm,
>> + struct task_struct *owner)
>> +{
>> + struct vdpasim_mm_work mm_work;
>> +
>> + mm_work.owner = owner;
>> + mm_work.mm = new_mm;
>> + mm_work.bind = true;
>> +
>> + vdpasim_worker_queue_mm(vdpasim, &mm_work);
>> +
>
>Should we wait for the work to be finished?
Yep, I'm waiting inside vdpasim_worker_queue_mm() calling
kthread_flush_work().
If we will use mutex, I think we can avoid the lock release around that
call.
>
>> + if (!mm_work.ret)
>> + vdpasim->mm_bound = new_mm;
>> +
>> + return mm_work.ret;
>> +}
>> +
>> +static void vdpasim_worker_unbind_mm(struct vdpasim *vdpasim)
>> +{
>> + struct vdpasim_mm_work mm_work;
>> +
>> + if (!vdpasim->mm_bound)
>> + return;
>> +
>> + mm_work.mm = vdpasim->mm_bound;
>> + mm_work.bind = false;
>> +
>> + vdpasim_worker_queue_mm(vdpasim, &mm_work);
>> +
>> + vdpasim->mm_bound = NULL;
>> +}
>> static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa)
>> {
>> return container_of(vdpa, struct vdpasim, vdpa);
>> @@ -66,8 +146,10 @@ static void vdpasim_vq_notify(struct vringh *vring)
>> static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
>> {
>> struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx];
>> + bool va_enabled = use_va && vdpasim->mm_bound;
>>
>> - vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false, false,
>> + vringh_init_iotlb(&vq->vring, vdpasim->features, vq->num, false,
>> + va_enabled,
>> (struct vring_desc *)(uintptr_t)vq->desc_addr,
>> (struct vring_avail *)
>> (uintptr_t)vq->driver_addr,
>> @@ -96,6 +178,9 @@ static void vdpasim_do_reset(struct vdpasim *vdpasim)
>> {
>> int i;
>>
>> + //TODO: should we cancel the works?
>> + vdpasim_worker_unbind_mm(vdpasim);
>
>We probably don't need this since it's the virtio level reset so we
>need to keep the mm bound in this case. Otherwise we may break the
>guest. It should be the responsibility of the driver to call
>config_ops->unbind if it needs to do that.
Got it, my biggest concern was when we go from a vhost-vdpa virtio-vdpa,
but as you said, in vhost-vdpa I can call unbind before releasing the
device.
Thanks,
Stefano
Powered by blists - more mailing lists