[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250814033522.770575-14-yukuai1@huaweicloud.com>
Date: Thu, 14 Aug 2025 11:35:19 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: axboe@...nel.dk,
yukuai3@...wei.com,
bvanassche@....org,
nilay@...ux.ibm.com,
hare@...e.de,
ming.lei@...hat.com
Cc: linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org,
yukuai1@...weicloud.com,
yi.zhang@...wei.com,
yangerkun@...wei.com,
johnny.chenyi@...wei.com
Subject: [PATCH 13/16] mq-deadline: covert to use request_queue->async_depth
From: Yu Kuai <yukuai3@...wei.com>
In downstream kernel, we test with mq-deadline with many fio workloads, and
we found a performance regression after commit 39823b47bbd4
("block/mq-deadline: Fix the tag reservation code") with following test:
[global]
rw=randread
direct=1
ramp_time=1
ioengine=libaio
iodepth=1024
numjobs=24
bs=1024k
group_reporting=1
runtime=60
[job1]
filename=/dev/sda
Root cause is that mq-deadline now support configuring async_depth,
although the default value is nr_request, however the minimal value is
1, hence min_shallow_depth is set to 1, causing wake_batch to be 1. For
consequence, sbitmap_queue will be waken up after each IO instead of
8 IO.
In this test case, sda is HDD and max_sectors is 128k, hence each
submitted 1M io will be splited into 8 sequential 128k requests, however
due to there are 24 jobs and total tags are exhausted, the 8 requests are
unlikely to be dispatched sequentially, and changing wake_batch to 1
will make this much worse, accounting blktrace D stage, the percentage
of sequential io is decreased from 8% to 0.8%.
Fix this problem by converting to request_queue->async_depth, where
min_shallow_depth is set each time async_depth is updated.
Fixes: 39823b47bbd4 ("block/mq-deadline: Fix the tag reservation code")
Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
block/mq-deadline.c | 42 +++---------------------------------------
1 file changed, 3 insertions(+), 39 deletions(-)
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 1825173d82a6..970e1541b7b7 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -98,7 +98,6 @@ struct deadline_data {
int fifo_batch;
int writes_starved;
int front_merges;
- u32 async_depth;
int prio_aging_expire;
spinlock_t lock;
@@ -487,32 +486,10 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
return rq;
}
-/*
- * Called by __blk_mq_alloc_request(). The shallow_depth value set by this
- * function is used by __blk_mq_get_tag().
- */
-static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
-{
- struct deadline_data *dd = data->q->elevator->elevator_data;
-
- /* Do not throttle synchronous reads. */
- if (blk_mq_sched_sync_request(opf))
- return;
-
- /*
- * Throttle asynchronous requests and writes such that these requests
- * do not block the allocation of synchronous requests.
- */
- data->shallow_depth = dd->async_depth;
-}
-
-/* Called by blk_mq_update_nr_requests(). */
+/* Called by blk_mq_init_sched() and blk_mq_update_nr_requests(). */
static void dd_depth_updated(struct request_queue *q)
{
- struct deadline_data *dd = q->elevator->elevator_data;
-
- dd->async_depth = q->nr_requests;
- blk_mq_set_min_shallow_depth(q, 1);
+ blk_mq_set_min_shallow_depth(q, q->async_depth);
}
static void dd_exit_sched(struct elevator_queue *e)
@@ -577,6 +554,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_queue *eq)
blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = eq;
+ q->async_depth = q->nr_requests;
return 0;
}
@@ -761,7 +739,6 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]);
SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire);
SHOW_INT(deadline_writes_starved_show, dd->writes_starved);
SHOW_INT(deadline_front_merges_show, dd->front_merges);
-SHOW_INT(deadline_async_depth_show, dd->async_depth);
SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch);
#undef SHOW_INT
#undef SHOW_JIFFIES
@@ -791,7 +768,6 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA
STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX);
STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX);
STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1);
-STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX);
STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX);
#undef STORE_FUNCTION
#undef STORE_INT
@@ -805,7 +781,6 @@ static const struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(write_expire),
DD_ATTR(writes_starved),
DD_ATTR(front_merges),
- DD_ATTR(async_depth),
DD_ATTR(fifo_batch),
DD_ATTR(prio_aging_expire),
__ATTR_NULL
@@ -892,15 +867,6 @@ static int deadline_starved_show(void *data, struct seq_file *m)
return 0;
}
-static int dd_async_depth_show(void *data, struct seq_file *m)
-{
- struct request_queue *q = data;
- struct deadline_data *dd = q->elevator->elevator_data;
-
- seq_printf(m, "%u\n", dd->async_depth);
- return 0;
-}
-
static int dd_queued_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
@@ -1010,7 +976,6 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
DEADLINE_NEXT_RQ_ATTR(write2),
{"batching", 0400, deadline_batching_show},
{"starved", 0400, deadline_starved_show},
- {"async_depth", 0400, dd_async_depth_show},
{"dispatch0", 0400, .seq_ops = &deadline_dispatch0_seq_ops},
{"dispatch1", 0400, .seq_ops = &deadline_dispatch1_seq_ops},
{"dispatch2", 0400, .seq_ops = &deadline_dispatch2_seq_ops},
@@ -1024,7 +989,6 @@ static const struct blk_mq_debugfs_attr deadline_queue_debugfs_attrs[] = {
static struct elevator_type mq_deadline = {
.ops = {
.depth_updated = dd_depth_updated,
- .limit_depth = dd_limit_depth,
.insert_requests = dd_insert_requests,
.dispatch_request = dd_dispatch_request,
.prepare_request = dd_prepare_request,
--
2.39.2
Powered by blists - more mailing lists