linux-kernel - Re: [PATCH] vdpa: bypass waking up vhost

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a21bf980-c001-4728-0f08-69494f31fe98@redhat.com>
Date:   Thu, 28 May 2020 18:06:15 +0800
From:   Jason Wang <jasowang@...hat.com>
To:     Zhu Lingshan <lingshan.zhu@...el.com>, mst@...hat.com,
        kvm@...r.kernel.org, virtualization@...ts.linux-foundation.org,
        linux-kernel@...r.kernel.org, netdev@...r.kernel.org
Cc:     lulu@...hat.com, dan.daly@...el.com, cunming.liang@...el.com
Subject: Re: [PATCH] vdpa: bypass waking up vhost_woker for vdpa vq kick


On 2020/5/26 下午1:32, Zhu Lingshan wrote:
> Standard vhost devices rely on waking up a vhost_worker to kick
> a virtquque. However vdpa devices have hardware backends, so it
> does not need this waking up routin. In this commit, vdpa device
> will kick a virtqueue directly, reduce the performance overhead
> caused by waking up a vhost_woker.


Thanks for the patch. It would be helpful if you can share some 
performance numbers.

And the title should be "vhost-vdpa:" instead of "vdpa:"

This patch is important since we want to get rid of ktrhead and 
use_mm()/unuse_mm() stuffs which allows us to implement doorbell mapping.


>
> Signed-off-by: Zhu Lingshan <lingshan.zhu@...el.com>
> Suggested-by: Jason Wang <jasowang@...hat.com>
> ---
>   drivers/vhost/vdpa.c | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 100 insertions(+)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 0968361..d3a2aca 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -287,6 +287,66 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
>   
>   	return 0;
>   }
> +void vhost_vdpa_poll_stop(struct vhost_virtqueue *vq)
> +{
> +	vhost_poll_stop(&vq->poll);
> +}
> +
> +int vhost_vdpa_poll_start(struct vhost_virtqueue *vq)
> +{
> +	struct vhost_poll *poll = &vq->poll;
> +	struct file *file = vq->kick;
> +	__poll_t mask;
> +
> +
> +	if (poll->wqh)
> +		return 0;
> +
> +	mask = vfs_poll(file, &poll->table);
> +	if (mask)
> +		vq->handle_kick(&vq->poll.work);
> +	if (mask & EPOLLERR) {
> +		vhost_poll_stop(poll);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}


So this basically a duplication of vhost_poll_start()?


> +
> +static long vhost_vdpa_set_vring_kick(struct vhost_virtqueue *vq,
> +				      void __user *argp)
> +{
> +	bool pollstart = false, pollstop = false;
> +	struct file *eventfp, *filep = NULL;
> +	struct vhost_vring_file f;
> +	long r;
> +
> +	if (copy_from_user(&f, argp, sizeof(f)))
> +		return -EFAULT;
> +
> +	eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
> +	if (IS_ERR(eventfp)) {
> +		r = PTR_ERR(eventfp);
> +		return r;
> +	}
> +
> +	if (eventfp != vq->kick) {
> +		pollstop = (filep = vq->kick) != NULL;
> +		pollstart = (vq->kick = eventfp) != NULL;
> +	} else
> +		filep = eventfp;
> +
> +	if (pollstop && vq->handle_kick)
> +		vhost_vdpa_poll_stop(vq);
> +
> +	if (filep)
> +		fput(filep);
> +
> +	if (pollstart && vq->handle_kick)
> +		r = vhost_vdpa_poll_start(vq);
> +
> +	return r;
> +}
>   
>   static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>   				   void __user *argp)
> @@ -316,6 +376,11 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>   		return 0;
>   	}
>   
> +	if (cmd == VHOST_SET_VRING_KICK) {
> +		r = vhost_vdpa_set_vring_kick(vq, argp);
> +		return r;
> +	}
> +
>   	if (cmd == VHOST_GET_VRING_BASE)
>   		vq->last_avail_idx = ops->get_vq_state(v->vdpa, idx);
>   
> @@ -667,6 +732,39 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
>   	v->domain = NULL;
>   }
>   
> +static int vhost_vdpa_poll_worker(wait_queue_entry_t *wait, unsigned int mode,
> +				  int sync, void *key)
> +{
> +	struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
> +	struct vhost_virtqueue *vq = container_of(poll, struct vhost_virtqueue,
> +						  poll);
> +
> +	if (!(key_to_poll(key) & poll->mask))
> +		return 0;
> +
> +	vq->handle_kick(&vq->poll.work);
> +
> +	return 0;
> +}
> +
> +void vhost_vdpa_poll_init(struct vhost_dev *dev)
> +{
> +	struct vhost_virtqueue *vq;
> +	struct vhost_poll *poll;
> +	int i;
> +
> +	for (i = 0; i < dev->nvqs; i++) {
> +		vq = dev->vqs[i];
> +		poll = &vq->poll;
> +		if (vq->handle_kick) {
> +			init_waitqueue_func_entry(&poll->wait,
> +						  vhost_vdpa_poll_worker);
> +			poll->work.fn = vq->handle_kick;


Why this is needed?


> +		}
> +
> +	}
> +}
> +
>   static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>   {
>   	struct vhost_vdpa *v;
> @@ -697,6 +795,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>   	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0,
>   		       vhost_vdpa_process_iotlb_msg);
>   
> +	vhost_vdpa_poll_init(dev);
> +
>   	dev->iotlb = vhost_iotlb_alloc(0, 0);
>   	if (!dev->iotlb) {
>   		r = -ENOMEM;


So my feeling here is that you want to reuse the infrastructure in 
vhost.c as much as possible

If this is true, let's just avoid duplicating the codes. How about 
adding something like in vhost_poll_wakeup():


     struct vhost_poll *poll = container_of(wait, struct vhost_poll, wait);
     struct vhost_work *work = &poll->work;

     if (!(key_to_poll(key) & poll->mask))
         return 0;

     if (!poll->dev->use_worker)
         work->fn(work);
     else
         vhost_poll_queue(poll);


Then modify vhost_dev_init() to set use_worker (all true except for vdpa)?


Thanks