[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250722072431.610354-7-yukuai1@huaweicloud.com>
Date: Tue, 22 Jul 2025 15:24:31 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: dlemoal@...nel.org,
hare@...e.de,
tj@...nel.org,
josef@...icpanda.com,
axboe@...nel.dk,
yukuai3@...wei.com
Cc: cgroups@...r.kernel.org,
linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org,
yukuai1@...weicloud.com,
yi.zhang@...wei.com,
yangerkun@...wei.com,
johnny.chenyi@...wei.com
Subject: [PATCH 6/6] blk-mq-sched: support request batch dispatching for sq elevator
From: Yu Kuai <yukuai3@...wei.com>
For dispatch_request method, current behavior is dispatching one request at
a time. In the case of multiple dispatching contexts, This behavior, on the
one hand, introduce intense lock contention:
t1: t2: t3:
lock lock lock
// grab lock
ops.dispatch_request
unlock
// grab lock
ops.dispatch_request
unlock
// grab lock
ops.dispatch_request
unlock
on the other hand, messing up the requests dispatching order:
t1:
lock
rq1 = ops.dispatch_request
unlock
t2:
lock
rq2 = ops.dispatch_request
unlock
lock
rq3 = ops.dispatch_request
unlock
lock
rq4 = ops.dispatch_request
unlock
//rq1,rq3 issue to disk
// rq2, rq4 issue to disk
In this case, the elevator dispatch order is rq 1-2-3-4, however,
such order in disk is rq 1-3-2-4, the order for rq2 and rq3 is inversed.
Fix those problems by introducing elevator_dispatch_requests(), this
helper will grab the lock and dispatch a batch of requests while holding
the lock.
Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
block/blk-mq-sched.c | 60 +++++++++++++++++++++++++++++++++++++++++---
block/blk-mq.h | 21 ++++++++++++++++
2 files changed, 77 insertions(+), 4 deletions(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index f18aecf710ad..c4450b73ab25 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -101,6 +101,54 @@ static bool elevator_can_dispatch(struct sched_dispatch_ctx *ctx)
return true;
}
+static void elevator_dispatch_requests(struct sched_dispatch_ctx *ctx)
+{
+ struct request *rq;
+ bool has_get_budget = ctx->q->mq_ops->get_budget != NULL;
+ int budget_token[BUDGET_TOKEN_BATCH];
+ int count = ctx->q->nr_requests;
+ int i;
+
+ while (true) {
+ if (!elevator_can_dispatch(ctx))
+ return;
+
+ if (has_get_budget) {
+ count = blk_mq_get_dispatch_budgets(ctx->q, budget_token);
+ if (count <= 0)
+ return;
+ }
+
+ spin_lock_irq(&ctx->e->lock);
+ for (i = 0; i < count; ++i) {
+ rq = ctx->e->type->ops.dispatch_request(ctx->hctx);
+ if (!rq) {
+ ctx->run_queue = true;
+ goto err_free_budgets;
+ }
+
+ if (has_get_budget)
+ blk_mq_set_rq_budget_token(rq, budget_token[i]);
+ list_add_tail(&rq->queuelist, &ctx->rq_list);
+ ctx->count++;
+ if (rq->mq_hctx != ctx->hctx)
+ ctx->multi_hctxs = true;
+
+ if (!blk_mq_get_driver_tag(rq)) {
+ i++;
+ goto err_free_budgets;
+ }
+ }
+ spin_unlock_irq(&ctx->e->lock);
+ }
+
+err_free_budgets:
+ spin_unlock_irq(&ctx->e->lock);
+ if (has_get_budget)
+ for (; i < count; ++i)
+ blk_mq_put_dispatch_budget(ctx->q, budget_token[i]);
+}
+
static bool elevator_dispatch_one_request(struct sched_dispatch_ctx *ctx)
{
bool sq_sched = blk_queue_sq_sched(ctx->q);
@@ -213,10 +261,14 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
else
max_dispatch = hctx->queue->nr_requests;
- do {
- if (!elevator_dispatch_one_request(&ctx))
- break;
- } while (ctx.count < max_dispatch);
+ if (!hctx->dispatch_busy && blk_queue_sq_sched(ctx.q))
+ elevator_dispatch_requests(&ctx);
+ else {
+ do {
+ if (!elevator_dispatch_one_request(&ctx))
+ break;
+ } while (ctx.count < max_dispatch);
+ }
return elevator_finish_dispatch(&ctx);
}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index affb2e14b56e..450c16a07841 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -37,6 +37,7 @@ enum {
};
#define BLK_MQ_CPU_WORK_BATCH (8)
+#define BUDGET_TOKEN_BATCH (8)
typedef unsigned int __bitwise blk_insert_t;
#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)
@@ -262,6 +263,26 @@ static inline int blk_mq_get_dispatch_budget(struct request_queue *q)
return 0;
}
+static inline int blk_mq_get_dispatch_budgets(struct request_queue *q,
+ int *budget_token)
+{
+ int count = 0;
+
+ while (count < BUDGET_TOKEN_BATCH) {
+ int token = 0;
+
+ if (q->mq_ops->get_budget)
+ token = q->mq_ops->get_budget(q);
+
+ if (token < 0)
+ return count;
+
+ budget_token[count++] = token;
+ }
+
+ return count;
+}
+
static inline void blk_mq_set_rq_budget_token(struct request *rq, int token)
{
if (token < 0)
--
2.39.2
Powered by blists - more mailing lists