lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20131025160613.GA14191@hj.localdomain>
Date:	Sat, 26 Oct 2013 00:06:13 +0800
From:	Asias He <asias@...hat.com>
To:	Christoph Hellwig <hch@...radead.org>
Cc:	Jens Axboe <axboe@...nel.dk>,
	Rusty Russell <rusty@...tcorp.com.au>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] virtio_blk: blk-mq support

On Fri, Oct 25, 2013 at 06:05:01AM -0700, Christoph Hellwig wrote:
> Switch virtio-blk from the dual support for old-style requests and bios
> to use the block-multiqueue.  For now pretend to have 4 issue queues
> as Jens pulled that out of his this hair and it worked.
> 
> Signed-off-by: Jens Axboe <axboe@...nel.dk>
> Signed-off-by: Christoph Hellwig <hch@....de>
> ---
>  drivers/block/virtio_blk.c |  312 +++++++++-----------------------------------
>  1 file changed, 65 insertions(+), 247 deletions(-)
> 
> diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> index 5cdf88b..87f099d 100644
> --- a/drivers/block/virtio_blk.c
> +++ b/drivers/block/virtio_blk.c
> @@ -11,12 +11,11 @@
>  #include <linux/string_helpers.h>
>  #include <scsi/scsi_cmnd.h>
>  #include <linux/idr.h>
> +#include <linux/blk-mq.h>
> +#include <linux/numa.h>
>  
>  #define PART_BITS 4
>  
> -static bool use_bio;
> -module_param(use_bio, bool, S_IRUGO);
> -
>  static int major;
>  static DEFINE_IDA(vd_index_ida);
>  
> @@ -26,13 +25,11 @@ struct virtio_blk
>  {
>  	struct virtio_device *vdev;
>  	struct virtqueue *vq;
> -	wait_queue_head_t queue_wait;
> +	spinlock_t vq_lock;
>  
>  	/* The disk structure for the kernel. */
>  	struct gendisk *disk;
>  
> -	mempool_t *pool;
> -
>  	/* Process context for config space updates */
>  	struct work_struct config_work;
>  
> @@ -47,15 +44,11 @@ struct virtio_blk
>  
>  	/* Ida index - used to track minor number allocations. */
>  	int index;
> -
> -	/* Scatterlist: can be too big for stack. */
> -	struct scatterlist sg[/*sg_elems*/];
>  };
>  
>  struct virtblk_req
>  {
>  	struct request *req;
> -	struct bio *bio;
>  	struct virtio_blk_outhdr out_hdr;
>  	struct virtio_scsi_inhdr in_hdr;
>  	struct work_struct work;

We can drop more things:

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 87f099d..c5ca368 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -51,20 +51,10 @@ struct virtblk_req
 	struct request *req;
 	struct virtio_blk_outhdr out_hdr;
 	struct virtio_scsi_inhdr in_hdr;
-	struct work_struct work;
-	struct virtio_blk *vblk;
-	int flags;
 	u8 status;
 	struct scatterlist sg[];
 };
 
-enum {
-	VBLK_IS_FLUSH		= 1,
-	VBLK_REQ_FLUSH		= 2,
-	VBLK_REQ_DATA		= 4,
-	VBLK_REQ_FUA		= 8,
-};
-
 static inline int virtblk_result(struct virtblk_req *vbr)
 {
 	switch (vbr->status) {

> @@ -84,22 +77,6 @@ static inline int virtblk_result(struct virtblk_req *vbr)
>  	}
>  }
>  
> -static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
> -						    gfp_t gfp_mask)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = mempool_alloc(vblk->pool, gfp_mask);
> -	if (!vbr)
> -		return NULL;
> -
> -	vbr->vblk = vblk;
> -	if (use_bio)
> -		sg_init_table(vbr->sg, vblk->sg_elems);
> -
> -	return vbr;
> -}
> -
>  static int __virtblk_add_req(struct virtqueue *vq,
>  			     struct virtblk_req *vbr,
>  			     struct scatterlist *data_sg,
> @@ -143,83 +120,8 @@ static int __virtblk_add_req(struct virtqueue *vq,
>  	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
>  }
>  
> -static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -	DEFINE_WAIT(wait);
> -	int ret;
> -
> -	spin_lock_irq(vblk->disk->queue->queue_lock);
> -	while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
> -						 have_data)) < 0)) {
> -		prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
> -					  TASK_UNINTERRUPTIBLE);
> -
> -		spin_unlock_irq(vblk->disk->queue->queue_lock);
> -		io_schedule();
> -		spin_lock_irq(vblk->disk->queue->queue_lock);
> -
> -		finish_wait(&vblk->queue_wait, &wait);
> -	}
> -
> -	virtqueue_kick(vblk->vq);
> -	spin_unlock_irq(vblk->disk->queue->queue_lock);
> -}
> -
> -static void virtblk_bio_send_flush(struct virtblk_req *vbr)
> -{
> -	vbr->flags |= VBLK_IS_FLUSH;
> -	vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
> -	vbr->out_hdr.sector = 0;
> -	vbr->out_hdr.ioprio = 0;
> -
> -	virtblk_add_req(vbr, false);
> -}
> -
> -static void virtblk_bio_send_data(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -	struct bio *bio = vbr->bio;
> -	bool have_data;
> -
> -	vbr->flags &= ~VBLK_IS_FLUSH;
> -	vbr->out_hdr.type = 0;
> -	vbr->out_hdr.sector = bio->bi_sector;
> -	vbr->out_hdr.ioprio = bio_prio(bio);
> -
> -	if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
> -		have_data = true;
> -		if (bio->bi_rw & REQ_WRITE)
> -			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> -		else
> -			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
> -	} else
> -		have_data = false;
> -
> -	virtblk_add_req(vbr, have_data);
> -}
> -
> -static void virtblk_bio_send_data_work(struct work_struct *work)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = container_of(work, struct virtblk_req, work);
> -
> -	virtblk_bio_send_data(vbr);
> -}
> -
> -static void virtblk_bio_send_flush_work(struct work_struct *work)
> -{
> -	struct virtblk_req *vbr;
> -
> -	vbr = container_of(work, struct virtblk_req, work);
> -
> -	virtblk_bio_send_flush(vbr);
> -}
> -
>  static inline void virtblk_request_done(struct virtblk_req *vbr)
>  {
> -	struct virtio_blk *vblk = vbr->vblk;
>  	struct request *req = vbr->req;
>  	int error = virtblk_result(vbr);
>  
> @@ -231,90 +133,43 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
>  		req->errors = (error != 0);
>  	}
>  
> -	__blk_end_request_all(req, error);
> -	mempool_free(vbr, vblk->pool);
> -}
> -
> -static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -
> -	if (vbr->flags & VBLK_REQ_DATA) {
> -		/* Send out the actual write data */
> -		INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
> -		queue_work(virtblk_wq, &vbr->work);
> -	} else {
> -		bio_endio(vbr->bio, virtblk_result(vbr));
> -		mempool_free(vbr, vblk->pool);
> -	}
> -}
> -
> -static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
> -{
> -	struct virtio_blk *vblk = vbr->vblk;
> -
> -	if (unlikely(vbr->flags & VBLK_REQ_FUA)) {
> -		/* Send out a flush before end the bio */
> -		vbr->flags &= ~VBLK_REQ_DATA;
> -		INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
> -		queue_work(virtblk_wq, &vbr->work);
> -	} else {
> -		bio_endio(vbr->bio, virtblk_result(vbr));
> -		mempool_free(vbr, vblk->pool);
> -	}
> -}
> -
> -static inline void virtblk_bio_done(struct virtblk_req *vbr)
> -{
> -	if (unlikely(vbr->flags & VBLK_IS_FLUSH))
> -		virtblk_bio_flush_done(vbr);
> -	else
> -		virtblk_bio_data_done(vbr);
> +	blk_mq_end_io(req, error);
>  }
>  
>  static void virtblk_done(struct virtqueue *vq)
>  {
>  	struct virtio_blk *vblk = vq->vdev->priv;
> -	bool bio_done = false, req_done = false;
> +	bool req_done = false;
>  	struct virtblk_req *vbr;
>  	unsigned long flags;
>  	unsigned int len;
>  
> -	spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
> +	spin_lock_irqsave(&vblk->vq_lock, flags);
>  	do {
>  		virtqueue_disable_cb(vq);
>  		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
> -			if (vbr->bio) {
> -				virtblk_bio_done(vbr);
> -				bio_done = true;
> -			} else {
> -				virtblk_request_done(vbr);
> -				req_done = true;
> -			}
> +			virtblk_request_done(vbr);
> +			req_done = true;
>  		}
>  	} while (!virtqueue_enable_cb(vq));
> +	spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +
>  	/* In case queue is stopped waiting for more buffers. */
>  	if (req_done)
> -		blk_start_queue(vblk->disk->queue);
> -	spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
> -
> -	if (bio_done)
> -		wake_up(&vblk->queue_wait);
> +		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
>  }
>  
> -static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
> -		   struct request *req)
> +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
>  {
> +	struct virtio_blk *vblk = hctx->queue->queuedata;
> +	struct virtblk_req *vbr = req->special;
> +	unsigned long flags;
>  	unsigned int num;
> -	struct virtblk_req *vbr;
> +	const bool last = (req->cmd_flags & REQ_END) != 0;
>  
> -	vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
> -	if (!vbr)
> -		/* When another request finishes we'll try again. */
> -		return false;
> +	BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
>  
>  	vbr->req = req;
> -	vbr->bio = NULL;
>  	if (req->cmd_flags & REQ_FLUSH) {
>  		vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
>  		vbr->out_hdr.sector = 0;
> @@ -342,7 +197,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
>  		}
>  	}
>  
> -	num = blk_rq_map_sg(q, vbr->req, vblk->sg);
> +	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
>  	if (num) {
>  		if (rq_data_dir(vbr->req) == WRITE)
>  			vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
> @@ -350,63 +205,18 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
>  			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
>  	}
>  
> -	if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
> -		mempool_free(vbr, vblk->pool);
> -		return false;
> -	}
> -
> -	return true;
> -}
> -
> -static void virtblk_request(struct request_queue *q)
> -{
> -	struct virtio_blk *vblk = q->queuedata;
> -	struct request *req;
> -	unsigned int issued = 0;
> -
> -	while ((req = blk_peek_request(q)) != NULL) {
> -		BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> -
> -		/* If this request fails, stop queue and wait for something to
> -		   finish to restart it. */
> -		if (!do_req(q, vblk, req)) {
> -			blk_stop_queue(q);
> -			break;
> -		}
> -		blk_start_request(req);
> -		issued++;
> -	}
> -
> -	if (issued)
> +	spin_lock_irqsave(&vblk->vq_lock, flags);
> +	if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
> +		spin_unlock_irqrestore(&vblk->vq_lock, flags);
> +		blk_mq_stop_hw_queue(hctx);
>  		virtqueue_kick(vblk->vq);
> -}
> -
> -static void virtblk_make_request(struct request_queue *q, struct bio *bio)
> -{
> -	struct virtio_blk *vblk = q->queuedata;
> -	struct virtblk_req *vbr;
> -
> -	BUG_ON(bio->bi_phys_segments + 2 > vblk->sg_elems);
> -
> -	vbr = virtblk_alloc_req(vblk, GFP_NOIO);
> -	if (!vbr) {
> -		bio_endio(bio, -ENOMEM);
> -		return;
> +		return BLK_MQ_RQ_QUEUE_BUSY;
>  	}
> +	spin_unlock_irqrestore(&vblk->vq_lock, flags);
>  
> -	vbr->bio = bio;
> -	vbr->flags = 0;
> -	if (bio->bi_rw & REQ_FLUSH)
> -		vbr->flags |= VBLK_REQ_FLUSH;
> -	if (bio->bi_rw & REQ_FUA)
> -		vbr->flags |= VBLK_REQ_FUA;
> -	if (bio->bi_size)
> -		vbr->flags |= VBLK_REQ_DATA;
> -
> -	if (unlikely(vbr->flags & VBLK_REQ_FLUSH))
> -		virtblk_bio_send_flush(vbr);
> -	else
> -		virtblk_bio_send_data(vbr);
> +	if (last)
> +		virtqueue_kick(vblk->vq);
> +	return BLK_MQ_RQ_QUEUE_OK;
>  }
>  
>  /* return id (s/n) string for *disk to *id_str
> @@ -680,12 +490,35 @@ static const struct device_attribute dev_attr_cache_type_rw =
>  	__ATTR(cache_type, S_IRUGO|S_IWUSR,
>  	       virtblk_cache_type_show, virtblk_cache_type_store);
>  
> +static struct blk_mq_ops virtio_mq_ops = {
> +	.queue_rq	= virtio_queue_rq,
> +	.map_queue	= blk_mq_map_queue,
> +	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
> +	.free_hctx	= blk_mq_free_single_hw_queue,
> +};
> +
> +static struct blk_mq_reg virtio_mq_reg = {
> +	.ops		= &virtio_mq_ops,
> +	.nr_hw_queues	= 4,
> +	.queue_depth	= 64,
> +	.numa_node	= NUMA_NO_NODE,
> +	.flags		= BLK_MQ_F_SHOULD_MERGE,
> +};
> +
> +static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
> +			     struct request *rq, unsigned int nr)
> +{
> +	struct virtio_blk *vblk = data;
> +	struct virtblk_req *vbr = rq->special;
> +
> +	sg_init_table(vbr->sg, vblk->sg_elems);
> +}
> +
>  static int virtblk_probe(struct virtio_device *vdev)
>  {
>  	struct virtio_blk *vblk;
>  	struct request_queue *q;
>  	int err, index;
> -	int pool_size;
>  
>  	u64 cap;
>  	u32 v, blk_size, sg_elems, opt_io_size;
> @@ -709,17 +542,14 @@ static int virtblk_probe(struct virtio_device *vdev)
>  
>  	/* We need an extra sg elements at head and tail. */
>  	sg_elems += 2;
> -	vdev->priv = vblk = kmalloc(sizeof(*vblk) +
> -				    sizeof(vblk->sg[0]) * sg_elems, GFP_KERNEL);
> +	vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
>  	if (!vblk) {
>  		err = -ENOMEM;
>  		goto out_free_index;
>  	}
>  
> -	init_waitqueue_head(&vblk->queue_wait);
>  	vblk->vdev = vdev;
>  	vblk->sg_elems = sg_elems;
> -	sg_init_table(vblk->sg, vblk->sg_elems);
>  	mutex_init(&vblk->config_lock);
>  
>  	INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
> @@ -728,31 +558,27 @@ static int virtblk_probe(struct virtio_device *vdev)
>  	err = init_vq(vblk);
>  	if (err)
>  		goto out_free_vblk;
> -
> -	pool_size = sizeof(struct virtblk_req);
> -	if (use_bio)
> -		pool_size += sizeof(struct scatterlist) * sg_elems;
> -	vblk->pool = mempool_create_kmalloc_pool(1, pool_size);
> -	if (!vblk->pool) {
> -		err = -ENOMEM;
> -		goto out_free_vq;
> -	}
> +	spin_lock_init(&vblk->vq_lock);
>  
>  	/* FIXME: How many partitions?  How long is a piece of string? */
>  	vblk->disk = alloc_disk(1 << PART_BITS);
>  	if (!vblk->disk) {
>  		err = -ENOMEM;
> -		goto out_mempool;
> +		goto out_free_vq;
>  	}
>  
> -	q = vblk->disk->queue = blk_init_queue(virtblk_request, NULL);
> +	virtio_mq_reg.cmd_size =
> +		sizeof(struct virtblk_req) +
> +		sizeof(struct scatterlist) * sg_elems;
> +
> +	q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
>  	if (!q) {
>  		err = -ENOMEM;
>  		goto out_put_disk;
>  	}
>  
> -	if (use_bio)
> -		blk_queue_make_request(q, virtblk_make_request);
> +	blk_mq_init_commands(q, virtblk_init_vbr, vblk);
> +
>  	q->queuedata = vblk;
>  
>  	virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
> @@ -857,8 +683,6 @@ out_del_disk:
>  	blk_cleanup_queue(vblk->disk->queue);
>  out_put_disk:
>  	put_disk(vblk->disk);
> -out_mempool:
> -	mempool_destroy(vblk->pool);
>  out_free_vq:
>  	vdev->config->del_vqs(vdev);
>  out_free_vblk:
> @@ -890,7 +714,6 @@ static void virtblk_remove(struct virtio_device *vdev)
>  
>  	refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
>  	put_disk(vblk->disk);
> -	mempool_destroy(vblk->pool);
>  	vdev->config->del_vqs(vdev);
>  	kfree(vblk);
>  
> @@ -914,10 +737,7 @@ static int virtblk_freeze(struct virtio_device *vdev)
>  
>  	flush_work(&vblk->config_work);
>  
> -	spin_lock_irq(vblk->disk->queue->queue_lock);
> -	blk_stop_queue(vblk->disk->queue);
> -	spin_unlock_irq(vblk->disk->queue->queue_lock);
> -	blk_sync_queue(vblk->disk->queue);
> +	blk_mq_stop_hw_queues(vblk->disk->queue);
>  
>  	vdev->config->del_vqs(vdev);
>  	return 0;
> @@ -930,11 +750,9 @@ static int virtblk_restore(struct virtio_device *vdev)
>  
>  	vblk->config_enable = true;
>  	ret = init_vq(vdev->priv);
> -	if (!ret) {
> -		spin_lock_irq(vblk->disk->queue->queue_lock);
> -		blk_start_queue(vblk->disk->queue);
> -		spin_unlock_irq(vblk->disk->queue->queue_lock);
> -	}
> +	if (!ret)
> +		blk_mq_start_stopped_hw_queues(vblk->disk->queue);
> +
>  	return ret;
>  }
>  #endif
> -- 
> 1.7.10.4
> 
> 

-- 
Asias
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ