linux-kernel - Re: [PATCH RFC] virtio_blk: Use blk-iopoll for host->guest notify

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <201005141730.57158.iggy@theiggy.com>
Date:	Fri, 14 May 2010 17:30:56 -0500
From:	Brian Jackson <iggy@...iggy.com>
To:	Stefan Hajnoczi <stefanha@...ux.vnet.ibm.com>
Cc:	virtualization@...ts.linux-foundation.org,
	Rusty Russell <rusty@...tcorp.com.au>,
	Jens Axboe <jens.axboe@...cle.com>,
	linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
	qemu-devel@...gnu.org
Subject: Re: [PATCH RFC] virtio_blk: Use blk-iopoll for host->guest notify

On Friday, May 14, 2010 03:47:37 pm Stefan Hajnoczi wrote:
> This patch adds blk-iopoll interrupt mitigation to virtio-blk.  Instead
> of processing completed requests inside the virtqueue interrupt handler,
> a softirq is scheduled to process up to a maximum number of completed
> requests in one go.
> 
> If the number of complete requests exceeds the maximum number, then another
> softirq is scheduled to continue polling.  Otherwise the virtqueue
> interrupt is enabled again and we return to interrupt-driven mode.
> 
> The patch sets the maximum number of completed requests (aka budget, aka
> weight) to 4.  This is a low number but reflects the expensive context
> switch between guest and host virtio-blk emulation.
> 
> The blk-iopoll infrastructure is enabled system-wide by default:
> 
> kernel.blk_iopoll = 1
> 
> It can be disabled to always use interrupt-driven mode (useful for
> comparison):
> 
> kernel.blk_iopoll = 0


Any preliminary numbers? latency, throughput, cpu use? What about comparing 
different "weights"?


> 
> Signed-off-by: Stefan Hajnoczi <stefanha@...ux.vnet.ibm.com>
> ---
> No performance figures yet.
> 
>  drivers/block/virtio_blk.c |   71
> ++++++++++++++++++++++++++++++++++++++----- 1 files changed, 62
> insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 2138a7a..1523895 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -6,6 +6,7 @@
>  #include <linux/virtio.h>
>  #include <linux/virtio_blk.h>
>  #include <linux/scatterlist.h>
> +#include <linux/blk-iopoll.h>
> 
>  #define PART_BITS 4
> 
> @@ -26,6 +27,9 @@ struct virtio_blk
> 
>  	mempool_t *pool;
> 
> +	/* Host->guest notify mitigation */
> +	struct blk_iopoll iopoll;
> +
>  	/* What host tells us, plus 2 for header & tailer. */
>  	unsigned int sg_elems;
> 
> @@ -42,16 +46,18 @@ struct virtblk_req
>  	u8 status;
>  };
> 
> -static void blk_done(struct virtqueue *vq)
> +/* Assumes vblk->lock held */
> +static int __virtblk_end_requests(struct virtio_blk *vblk, int weight)
>  {
> -	struct virtio_blk *vblk = vq->vdev->priv;
>  	struct virtblk_req *vbr;
>  	unsigned int len;
> -	unsigned long flags;
> +	int error;
> +	int work = 0;
> 
> -	spin_lock_irqsave(&vblk->lock, flags);
> -	while ((vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len)) != NULL) {
> -		int error;
> +	while (!weight || work < weight) {
> +		vbr = vblk->vq->vq_ops->get_buf(vblk->vq, &len);
> +		if (!vbr)
> +			break;
> 
>  		switch (vbr->status) {
>  		case VIRTIO_BLK_S_OK:
> @@ -74,10 +80,53 @@ static void blk_done(struct virtqueue *vq)
>  		__blk_end_request_all(vbr->req, error);
>  		list_del(&vbr->list);
>  		mempool_free(vbr, vblk->pool);
> +		work++;
>  	}
> +
>  	/* In case queue is stopped waiting for more buffers. */
>  	blk_start_queue(vblk->disk->queue);
> +	return work;
> +}
> +
> +static int virtblk_iopoll(struct blk_iopoll *iopoll, int weight)
> +{
> +	struct virtio_blk *vblk =
> +		container_of(iopoll, struct virtio_blk, iopoll);
> +	unsigned long flags;
> +	int work;
> +
> +	spin_lock_irqsave(&vblk->lock, flags);
> +
> +	work = __virtblk_end_requests(vblk, weight);
> +	if (work < weight) {
> +		/* Keep polling if there are pending requests. */
> +		if (vblk->vq->vq_ops->enable_cb(vblk->vq))
> +			__blk_iopoll_complete(&vblk->iopoll);
> +		else
> +			vblk->vq->vq_ops->disable_cb(vblk->vq);
> +	}
> +
>  	spin_unlock_irqrestore(&vblk->lock, flags);
> +	return work;
> +}
> +
> +static void blk_done(struct virtqueue *vq)
> +{
> +	struct virtio_blk *vblk = vq->vdev->priv;
> +	unsigned long flags;
> +
> +	if (blk_iopoll_enabled) {
> +		if (!blk_iopoll_sched_prep(&vblk->iopoll)) {
> +			spin_lock_irqsave(&vblk->lock, flags);
> +			vblk->vq->vq_ops->disable_cb(vblk->vq);
> +			spin_unlock_irqrestore(&vblk->lock, flags);
> +			blk_iopoll_sched(&vblk->iopoll);
> +		}
> +	} else {
> +		spin_lock_irqsave(&vblk->lock, flags);
> +		__virtblk_end_requests(vblk, 0);
> +		spin_unlock_irqrestore(&vblk->lock, flags);
> +	}
>  }
> 
>  static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> @@ -289,11 +338,14 @@ static int __devinit virtblk_probe(struct
> virtio_device *vdev) goto out_free_vq;
>  	}
> 
> +	blk_iopoll_init(&vblk->iopoll, 4 /* budget */, virtblk_iopoll);
> +	blk_iopoll_enable(&vblk->iopoll);
> +
>  	/* FIXME: How many partitions?  How long is a piece of string? */
>  	vblk->disk = alloc_disk(1 << PART_BITS);
>  	if (!vblk->disk) {
>  		err = -ENOMEM;
> -		goto out_mempool;
> +		goto out_iopoll;
>  	}
> 
>  	q = vblk->disk->queue = blk_init_queue(do_virtblk_request, &vblk->lock);
> @@ -401,13 +453,13 @@ static int __devinit virtblk_probe(struct
> virtio_device *vdev) if (!err && opt_io_size)
>  		blk_queue_io_opt(q, blk_size * opt_io_size);
> 
> -
>  	add_disk(vblk->disk);
>  	return 0;
> 
>  out_put_disk:
>  	put_disk(vblk->disk);
> -out_mempool:
> +out_iopoll:
> +	blk_iopoll_disable(&vblk->iopoll);
>  	mempool_destroy(vblk->pool);
>  out_free_vq:
>  	vdev->config->del_vqs(vdev);
> @@ -430,6 +482,7 @@ static void __devexit virtblk_remove(struct
> virtio_device *vdev) del_gendisk(vblk->disk);
>  	blk_cleanup_queue(vblk->disk->queue);
>  	put_disk(vblk->disk);
> +	blk_iopoll_disable(&vblk->iopoll);
>  	mempool_destroy(vblk->pool);
>  	vdev->config->del_vqs(vdev);
>  	kfree(vblk);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/