linux-kernel - [PATCH 5/7] blk-mq: Precalculate hybrid polling time

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <25f7593a0350e07997fc31d1317218ffeec4f6bf.1556609582.git.asml.silence@gmail.com>
Date:   Tue, 30 Apr 2019 10:34:17 +0300
From:   "Pavel Begunkov (Silence)" <asml.silence@...il.com>
To:     Jens Axboe <axboe@...nel.dk>, linux-block@...r.kernel.org,
        linux-kernel@...r.kernel.org
Cc:     Pavel Begunkov <asml.silence@...il.com>
Subject: [PATCH 5/7] blk-mq: Precalculate hybrid polling time

From: Pavel Begunkov <asml.silence@...il.com>

Calculation of sleep time for adaptive hybrid polling on per-request
basis could become time consuming in the future.
Precalculate it once per statistics gathering round.

Signed-off-by: Pavel Begunkov <asml.silence@...il.com>
---
 block/blk-core.c       |  5 ++++-
 block/blk-mq-debugfs.c |  4 ++--
 block/blk-mq.c         | 39 ++++++++++++++++++++++-----------------
 include/linux/blkdev.h |  8 +++++++-
 4 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index a55389ba8779..daadce545e43 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -474,7 +474,7 @@ static void blk_timeout_work(struct work_struct *work)
 struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 {
 	struct request_queue *q;
-	int ret;
+	int ret, bucket;
 
 	q = kmem_cache_alloc_node(blk_requestq_cachep,
 				gfp_mask | __GFP_ZERO, node_id);
@@ -536,6 +536,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	if (blkcg_init_queue(q))
 		goto fail_ref;
 
+	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++)
+		q->poll_info[bucket].sleep_ns = 0;
+
 	return q;
 
 fail_ref:
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index b62bd4468db3..ab55446cb570 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -44,11 +44,11 @@ static int queue_poll_stat_show(void *data, struct seq_file *m)
 
 	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS/2; bucket++) {
 		seq_printf(m, "read  (%d Bytes): ", 1 << (9+bucket));
-		print_stat(m, &q->poll_stat[2*bucket]);
+		print_stat(m, &q->poll_info[2*bucket].stat);
 		seq_puts(m, "\n");
 
 		seq_printf(m, "write (%d Bytes): ",  1 << (9+bucket));
-		print_stat(m, &q->poll_stat[2*bucket+1]);
+		print_stat(m, &q->poll_info[2*bucket+1].stat);
 		seq_puts(m, "\n");
 	}
 	return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index cc3f73e4e01c..4e54a004e345 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3312,14 +3312,32 @@ static void blk_mq_poll_stats_start(struct request_queue *q)
 	blk_stat_activate_msecs(q->poll_cb, 100);
 }
 
+static void blk_mq_update_poll_info(struct poll_info *pi,
+				    struct blk_rq_stat *stat)
+{
+	u64 sleep_ns;
+
+	if (!stat->nr_samples)
+		sleep_ns = 0;
+	else
+		sleep_ns = (stat->mean + 1) / 2;
+
+	pi->stat = *stat;
+	pi->sleep_ns = sleep_ns;
+}
+
 static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb)
 {
 	struct request_queue *q = cb->data;
 	int bucket;
 
 	for (bucket = 0; bucket < BLK_MQ_POLL_STATS_BKTS; bucket++) {
-		if (cb->stat[bucket].nr_samples)
-			q->poll_stat[bucket] = cb->stat[bucket];
+		if (cb->stat[bucket].nr_samples) {
+			struct poll_info *pi = &q->poll_info[bucket];
+			struct blk_rq_stat *stat = &cb->stat[bucket];
+
+			blk_mq_update_poll_info(pi, stat);
+		}
 	}
 }
 
@@ -3327,7 +3345,6 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
 				       struct blk_mq_hw_ctx *hctx,
 				       struct request *rq)
 {
-	unsigned long ret = 0;
 	int bucket;
 
 	/*
@@ -3337,23 +3354,11 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
 	if (!blk_poll_stats_enable(q))
 		return 0;
 
-	/*
-	 * As an optimistic guess, use half of the mean service time
-	 * for this type of request. We can (and should) make this smarter.
-	 * For instance, if the completion latencies are tight, we can
-	 * get closer than just half the mean. This is especially
-	 * important on devices where the completion latencies are longer
-	 * than ~10 usec. We do use the stats for the relevant IO size
-	 * if available which does lead to better estimates.
-	 */
 	bucket = blk_mq_poll_stats_bkt(rq);
 	if (bucket < 0)
-		return ret;
-
-	if (q->poll_stat[bucket].nr_samples)
-		ret = (q->poll_stat[bucket].mean + 1) / 2;
+		return 0;
 
-	return ret;
+	return q->poll_info[bucket].sleep_ns;
 }
 
 static bool blk_mq_poll_hybrid_sleep(struct request_queue *q,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 317ab30d2904..40c77935fd61 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -385,6 +385,12 @@ static inline int blkdev_reset_zones_ioctl(struct block_device *bdev,
 
 #endif /* CONFIG_BLK_DEV_ZONED */
 
+struct poll_info
+{
+	struct blk_rq_stat	stat;
+	u64			sleep_ns;
+};
+
 struct request_queue {
 	/*
 	 * Together with queue_head for cacheline sharing
@@ -477,7 +483,7 @@ struct request_queue {
 	int			poll_nsec;
 
 	struct blk_stat_callback	*poll_cb;
-	struct blk_rq_stat	poll_stat[BLK_MQ_POLL_STATS_BKTS];
+	struct poll_info	poll_info[BLK_MQ_POLL_STATS_BKTS];
 
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
-- 
2.21.0