[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150923203121.GA30295@l.oracle.com>
Date: Wed, 23 Sep 2015 16:31:21 -0400
From: Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
To: Bob Liu <bob.liu@...cle.com>, david.vrabel@...rix.com
Cc: xen-devel@...ts.xen.org, david.vrabel@...rix.com,
linux-kernel@...r.kernel.org, roger.pau@...rix.com,
felipe.franciosi@...rix.com, axboe@...com, hch@...radead.org,
avanzini.arianna@...il.com, rafal.mielniczuk@...rix.com,
boris.ostrovsky@...cle.com, jonathan.davies@...rix.com
Subject: Re: [PATCH v3 1/9] xen-blkfront: convert to blk-mq APIs
On Sat, Sep 05, 2015 at 08:39:34PM +0800, Bob Liu wrote:
> Note: This patch is based on original work of Arianna's internship for
> GNOME's Outreach Program for Women.
>
> Only one hardware queue is used now, so there is no significant
> performance change
>
> The legacy non-mq code is deleted completely which is the same as other
> drivers like virtio, mtip, and nvme.
>
> Also dropped one unnecessary holding of info->io_lock when calling
> blk_mq_stop_hw_queues().
>
> Signed-off-by: Arianna Avanzini <avanzini.arianna@...il.com>
> Signed-off-by: Bob Liu <bob.liu@...cle.com>
> Reviewed-by: Christoph Hellwig <hch@....de>
> Acked-by: Jens Axboe <axboe@...com>
> Signed-off-by: David Vrabel <david.vrabel@...rix.com>
Odd.
This should have gone in Linux 4.3 but it did not? I remember seeing it
there? I think?
Anyhow I will put this in my queue for 4.4.
> ---
> drivers/block/xen-blkfront.c | 146 +++++++++++++++++-------------------------
> 1 file changed, 60 insertions(+), 86 deletions(-)
>
> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
> index 7a8a73f..5dd591d 100644
> --- a/drivers/block/xen-blkfront.c
> +++ b/drivers/block/xen-blkfront.c
> @@ -37,6 +37,7 @@
>
> #include <linux/interrupt.h>
> #include <linux/blkdev.h>
> +#include <linux/blk-mq.h>
> #include <linux/hdreg.h>
> #include <linux/cdrom.h>
> #include <linux/module.h>
> @@ -148,6 +149,7 @@ struct blkfront_info
> unsigned int feature_persistent:1;
> unsigned int max_indirect_segments;
> int is_ready;
> + struct blk_mq_tag_set tag_set;
> };
>
> static unsigned int nr_minors;
> @@ -617,54 +619,41 @@ static inline bool blkif_request_flush_invalid(struct request *req,
> !(info->feature_flush & REQ_FUA)));
> }
>
> -/*
> - * do_blkif_request
> - * read a block; request is in a request queue
> - */
> -static void do_blkif_request(struct request_queue *rq)
> +static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
> + const struct blk_mq_queue_data *qd)
> {
> - struct blkfront_info *info = NULL;
> - struct request *req;
> - int queued;
> -
> - pr_debug("Entered do_blkif_request\n");
> -
> - queued = 0;
> + struct blkfront_info *info = qd->rq->rq_disk->private_data;
>
> - while ((req = blk_peek_request(rq)) != NULL) {
> - info = req->rq_disk->private_data;
> -
> - if (RING_FULL(&info->ring))
> - goto wait;
> + blk_mq_start_request(qd->rq);
> + spin_lock_irq(&info->io_lock);
> + if (RING_FULL(&info->ring))
> + goto out_busy;
>
> - blk_start_request(req);
> + if (blkif_request_flush_invalid(qd->rq, info))
> + goto out_err;
>
> - if (blkif_request_flush_invalid(req, info)) {
> - __blk_end_request_all(req, -EOPNOTSUPP);
> - continue;
> - }
> + if (blkif_queue_request(qd->rq))
> + goto out_busy;
>
> - pr_debug("do_blk_req %p: cmd %p, sec %lx, "
> - "(%u/%u) [%s]\n",
> - req, req->cmd, (unsigned long)blk_rq_pos(req),
> - blk_rq_cur_sectors(req), blk_rq_sectors(req),
> - rq_data_dir(req) ? "write" : "read");
> -
> - if (blkif_queue_request(req)) {
> - blk_requeue_request(rq, req);
> -wait:
> - /* Avoid pointless unplugs. */
> - blk_stop_queue(rq);
> - break;
> - }
> + flush_requests(info);
> + spin_unlock_irq(&info->io_lock);
> + return BLK_MQ_RQ_QUEUE_OK;
>
> - queued++;
> - }
> +out_err:
> + spin_unlock_irq(&info->io_lock);
> + return BLK_MQ_RQ_QUEUE_ERROR;
>
> - if (queued != 0)
> - flush_requests(info);
> +out_busy:
> + spin_unlock_irq(&info->io_lock);
> + blk_mq_stop_hw_queue(hctx);
> + return BLK_MQ_RQ_QUEUE_BUSY;
> }
>
> +static struct blk_mq_ops blkfront_mq_ops = {
> + .queue_rq = blkif_queue_rq,
> + .map_queue = blk_mq_map_queue,
> +};
> +
> static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
> unsigned int physical_sector_size,
> unsigned int segments)
> @@ -672,9 +661,22 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
> struct request_queue *rq;
> struct blkfront_info *info = gd->private_data;
>
> - rq = blk_init_queue(do_blkif_request, &info->io_lock);
> - if (rq == NULL)
> + memset(&info->tag_set, 0, sizeof(info->tag_set));
> + info->tag_set.ops = &blkfront_mq_ops;
> + info->tag_set.nr_hw_queues = 1;
> + info->tag_set.queue_depth = BLK_RING_SIZE(info);
> + info->tag_set.numa_node = NUMA_NO_NODE;
> + info->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
> + info->tag_set.cmd_size = 0;
> + info->tag_set.driver_data = info;
> +
> + if (blk_mq_alloc_tag_set(&info->tag_set))
> return -1;
> + rq = blk_mq_init_queue(&info->tag_set);
> + if (IS_ERR(rq)) {
> + blk_mq_free_tag_set(&info->tag_set);
> + return -1;
> + }
>
> queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
>
> @@ -902,19 +904,15 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
> static void xlvbd_release_gendisk(struct blkfront_info *info)
> {
> unsigned int minor, nr_minors;
> - unsigned long flags;
>
> if (info->rq == NULL)
> return;
>
> - spin_lock_irqsave(&info->io_lock, flags);
> -
> /* No more blkif_request(). */
> - blk_stop_queue(info->rq);
> + blk_mq_stop_hw_queues(info->rq);
>
> /* No more gnttab callback work. */
> gnttab_cancel_free_callback(&info->callback);
> - spin_unlock_irqrestore(&info->io_lock, flags);
>
> /* Flush gnttab callback work. Must be done with no locks held. */
> flush_work(&info->work);
> @@ -926,20 +924,18 @@ static void xlvbd_release_gendisk(struct blkfront_info *info)
> xlbd_release_minors(minor, nr_minors);
>
> blk_cleanup_queue(info->rq);
> + blk_mq_free_tag_set(&info->tag_set);
> info->rq = NULL;
>
> put_disk(info->gd);
> info->gd = NULL;
> }
>
> +/* Must be called with io_lock holded */
> static void kick_pending_request_queues(struct blkfront_info *info)
> {
> - if (!RING_FULL(&info->ring)) {
> - /* Re-enable calldowns. */
> - blk_start_queue(info->rq);
> - /* Kick things off immediately. */
> - do_blkif_request(info->rq);
> - }
> + if (!RING_FULL(&info->ring))
> + blk_mq_start_stopped_hw_queues(info->rq, true);
> }
>
> static void blkif_restart_queue(struct work_struct *work)
> @@ -964,7 +960,7 @@ static void blkif_free(struct blkfront_info *info, int suspend)
> BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
> /* No more blkif_request(). */
> if (info->rq)
> - blk_stop_queue(info->rq);
> + blk_mq_stop_hw_queues(info->rq);
>
> /* Remove all persistent grants */
> if (!list_empty(&info->grants)) {
> @@ -1147,7 +1143,6 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
> RING_IDX i, rp;
> unsigned long flags;
> struct blkfront_info *info = (struct blkfront_info *)dev_id;
> - int error;
>
> spin_lock_irqsave(&info->io_lock, flags);
>
> @@ -1188,37 +1183,37 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
> continue;
> }
>
> - error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
> + req->errors = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
> switch (bret->operation) {
> case BLKIF_OP_DISCARD:
> if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> struct request_queue *rq = info->rq;
> printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> info->gd->disk_name, op_name(bret->operation));
> - error = -EOPNOTSUPP;
> + req->errors = -EOPNOTSUPP;
> info->feature_discard = 0;
> info->feature_secdiscard = 0;
> queue_flag_clear(QUEUE_FLAG_DISCARD, rq);
> queue_flag_clear(QUEUE_FLAG_SECDISCARD, rq);
> }
> - __blk_end_request_all(req, error);
> + blk_mq_complete_request(req);
> break;
> case BLKIF_OP_FLUSH_DISKCACHE:
> case BLKIF_OP_WRITE_BARRIER:
> if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
> printk(KERN_WARNING "blkfront: %s: %s op failed\n",
> info->gd->disk_name, op_name(bret->operation));
> - error = -EOPNOTSUPP;
> + req->errors = -EOPNOTSUPP;
> }
> if (unlikely(bret->status == BLKIF_RSP_ERROR &&
> info->shadow[id].req.u.rw.nr_segments == 0)) {
> printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
> info->gd->disk_name, op_name(bret->operation));
> - error = -EOPNOTSUPP;
> + req->errors = -EOPNOTSUPP;
> }
> - if (unlikely(error)) {
> - if (error == -EOPNOTSUPP)
> - error = 0;
> + if (unlikely(req->errors)) {
> + if (req->errors == -EOPNOTSUPP)
> + req->errors = 0;
> info->feature_flush = 0;
> xlvbd_flush(info);
> }
> @@ -1229,7 +1224,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
> dev_dbg(&info->xbdev->dev, "Bad return from blkdev data "
> "request: %x\n", bret->status);
>
> - __blk_end_request_all(req, error);
> + blk_mq_complete_request(req);
> break;
> default:
> BUG();
> @@ -1558,28 +1553,6 @@ static int blkif_recover(struct blkfront_info *info)
>
> kfree(copy);
>
> - /*
> - * Empty the queue, this is important because we might have
> - * requests in the queue with more segments than what we
> - * can handle now.
> - */
> - spin_lock_irq(&info->io_lock);
> - while ((req = blk_fetch_request(info->rq)) != NULL) {
> - if (req->cmd_flags &
> - (REQ_FLUSH | REQ_FUA | REQ_DISCARD | REQ_SECURE)) {
> - list_add(&req->queuelist, &requests);
> - continue;
> - }
> - merge_bio.head = req->bio;
> - merge_bio.tail = req->biotail;
> - bio_list_merge(&bio_list, &merge_bio);
> - req->bio = NULL;
> - if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
> - pr_alert("diskcache flush request found!\n");
> - __blk_end_request_all(req, 0);
> - }
> - spin_unlock_irq(&info->io_lock);
> -
> xenbus_switch_state(info->xbdev, XenbusStateConnected);
>
> spin_lock_irq(&info->io_lock);
> @@ -1594,9 +1567,10 @@ static int blkif_recover(struct blkfront_info *info)
> /* Requeue pending requests (flush or discard) */
> list_del_init(&req->queuelist);
> BUG_ON(req->nr_phys_segments > segs);
> - blk_requeue_request(info->rq, req);
> + blk_mq_requeue_request(req);
> }
> spin_unlock_irq(&info->io_lock);
> + blk_mq_kick_requeue_list(info->rq);
>
> while ((bio = bio_list_pop(&bio_list)) != NULL) {
> /* Traverse the list of pending bios and re-queue them */
> --
> 1.7.10.4
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists