lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Fri, 13 Mar 2015 12:39:42 -0700
From:	Shaohua Li <shli@...com>
To:	<linux-kernel@...r.kernel.org>
CC:	<Kernel-team@...com>, Jens Axboe <axboe@...com>,
	Christoph Hellwig <hch@....de>
Subject: [PATCH] blk-mq: rationalize plug

Previous post of the patch is lost, so I repost. This is helpful, for
example, using scsi-mq for a sata drive.

plug is still helpful for workload with IO merge, but it can be harmful
otherwise especially with multiple hardware queues, as there is (supposed) no
lock contention in this case and plug can introduce latency.

For single queue, we always do plug. Reducing lock contention is still a win.
For multiple queues, we do a limited plug, eg plug only if there is merge. If a
request doesn't have merge with following request, the requet will be
dispatched immediately.

An example workload here is fsync write a block device. Without plug
merge, sequential write (fsync makes it sync IO) will dispatch 4k IO.

Cc: Jens Axboe <axboe@...com>
Cc: Christoph Hellwig <hch@....de>
Signed-off-by: Shaohua Li <shli@...com>
---
 block/blk-mq.c | 98 ++++++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 71 insertions(+), 27 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4f4bea2..1791bfb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1221,6 +1221,45 @@ static struct request *blk_mq_map_request(struct request_queue *q,
 	return rq;
 }
 
+static int blk_mq_direct_issue_request(struct request *rq)
+{
+	int ret;
+	struct request_queue *q = rq->q;
+	struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
+			rq->mq_ctx->cpu);
+	struct blk_mq_queue_data bd = {
+		.rq = rq,
+		.list = NULL,
+		.last = 1
+	};
+
+	/*
+	 * If the driver supports defer issued based on 'last', then
+	 * queue it up like normal since we can potentially save some
+	 * CPU this way.
+	 */
+	if (hctx->flags & BLK_MQ_F_DEFER_ISSUE)
+		return -1;
+	/*
+	 * For OK queue, we are done. For error, kill it. Any other
+	 * error (busy), just add it to our list as we previously
+	 * would have done
+	 */
+	ret = q->mq_ops->queue_rq(hctx, &bd);
+	if (ret == BLK_MQ_RQ_QUEUE_OK)
+		return 0;
+	else {
+		__blk_mq_requeue_request(rq);
+
+		if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
+			rq->errors = -EIO;
+			blk_mq_end_request(rq, rq->errors);
+			return 0;
+		}
+		return -1;
+	}
+}
+
 /*
  * Multiple hardware queue variant. This will not use per-process plugs,
  * but will attempt to bypass the hctx queueing if we can go straight to
@@ -1230,8 +1269,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = rw_is_sync(bio->bi_rw);
 	const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA);
+	unsigned int use_plug, request_count = 0;
 	struct blk_map_ctx data;
 	struct request *rq;
+	struct blk_plug *plug;
+
+	use_plug = !is_flush_fua;
 
 	blk_queue_bounce(q, &bio);
 
@@ -1240,6 +1283,10 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		return;
 	}
 
+	if (use_plug && !blk_queue_nomerges(q) &&
+	    blk_attempt_plug_merge(q, bio, &request_count))
+		return;
+
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
 		return;
@@ -1251,37 +1298,34 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	/*
-	 * If the driver supports defer issued based on 'last', then
-	 * queue it up like normal since we can potentially save some
-	 * CPU this way.
+	 * we do limited pluging. If bio can be merged, do merge. Otherwise the
+	 * existing request in the plug list will be issued. So the plug list
+	 * will have one request at most
 	 */
-	if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
-		struct blk_mq_queue_data bd = {
-			.rq = rq,
-			.list = NULL,
-			.last = 1
-		};
-		int ret;
+	plug = current->plug;
+	if (use_plug && plug) {
+		struct request *old_rq = NULL;
 
 		blk_mq_bio_to_request(rq, bio);
+		if (!list_empty(&plug->mq_list)) {
+			old_rq = list_first_entry(&plug->mq_list,
+				struct request, queuelist);
+			list_del_init(&old_rq->queuelist);
+		}
+		list_add_tail(&rq->queuelist, &plug->mq_list);
+		blk_mq_put_ctx(data.ctx);
+		if (!old_rq)
+			return;
+		if (!blk_mq_direct_issue_request(old_rq))
+			return;
+		blk_mq_insert_request(old_rq, false, true, true);
+		return;
+	}
 
-		/*
-		 * For OK queue, we are done. For error, kill it. Any other
-		 * error (busy), just add it to our list as we previously
-		 * would have done
-		 */
-		ret = q->mq_ops->queue_rq(data.hctx, &bd);
-		if (ret == BLK_MQ_RQ_QUEUE_OK)
+	if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
+		blk_mq_bio_to_request(rq, bio);
+		if (!blk_mq_direct_issue_request(rq))
 			goto done;
-		else {
-			__blk_mq_requeue_request(rq);
-
-			if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
-				rq->errors = -EIO;
-				blk_mq_end_request(rq, rq->errors);
-				goto done;
-			}
-		}
 	}
 
 	if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) {
@@ -1314,7 +1358,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	 * If we have multiple hardware queues, just go directly to
 	 * one of those for sync IO.
 	 */
-	use_plug = !is_flush_fua && !is_sync;
+	use_plug = !is_flush_fua;
 
 	blk_queue_bounce(q, &bio);
 
-- 
1.8.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ