[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260203080602.726505-8-yukuai@fnnas.com>
Date: Tue, 3 Feb 2026 16:06:02 +0800
From: Yu Kuai <yukuai@...as.com>
To: tj@...nel.org,
josef@...icpanda.com,
axboe@...nel.dk
Cc: cgroups@...r.kernel.org,
linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org,
yukuai@...as.com,
zhengqixing@...wei.com,
mkoutny@...e.com,
hch@...radead.org,
ming.lei@...hat.com,
nilay@...ux.ibm.com
Subject: [PATCH v2 7/7] blk-rq-qos: move rq_qos_mutex acquisition inside rq_qos_add/del
The current rq_qos_mutex handling has an awkward pattern where callers
must acquire the mutex before calling rq_qos_add()/rq_qos_del(), and
blkg_conf_open_bdev_frozen() had to release and re-acquire the mutex
around queue freezing to maintain proper locking order (freeze queue
before mutex).
On the other hand, with rq_qos_mutex held after blkg_conf_prep(), there
are many possible deadlocks:
- allocating memory with GFP_KERNEL, like blk_throtl_init();
- allocating percpu memory, like pd_alloc_fn() for iocost/iolatency;
This patch refactors the locking by:
1. Moving queue freeze and rq_qos_mutex acquisition inside
rq_qos_add()/rq_qos_del(), with the correct order: freeze first,
then acquire mutex.
2. Removing external mutex handling from wbt_init() since rq_qos_add()
now handles it internally.
3. Removing rq_qos_mutex handling from blkg_conf_open_bdev() entirely,
making it only responsible for parsing MAJ:MIN and opening the bdev.
4. Removing blkg_conf_open_bdev_frozen() and blkg_conf_exit_frozen()
functions which are no longer needed.
5. Updating ioc_qos_write() to use the simpler blkg_conf_open_bdev()
and blkg_conf_exit() functions.
This eliminates the release-and-reacquire pattern and makes
rq_qos_add()/rq_qos_del() self-contained, which is cleaner and reduces
complexity. Each function now properly manages its own locking with
the correct order: queue freeze → mutex acquire → modify → mutex
release → queue unfreeze.
Signed-off-by: Yu Kuai <yukuai@...as.com>
---
block/blk-cgroup.c | 50 ----------------------------------------------
block/blk-cgroup.h | 2 --
block/blk-iocost.c | 11 ++++------
block/blk-rq-qos.c | 31 ++++++++++++++++------------
block/blk-wbt.c | 2 --
5 files changed, 22 insertions(+), 74 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7fcb216917d0..d17d2b44df43 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -802,10 +802,8 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
return -ENODEV;
}
- mutex_lock(&bdev->bd_queue->rq_qos_mutex);
if (!disk_live(bdev->bd_disk)) {
blkdev_put_no_open(bdev);
- mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
return -ENODEV;
}
@@ -813,38 +811,6 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
ctx->bdev = bdev;
return 0;
}
-/*
- * Similar to blkg_conf_open_bdev, but additionally freezes the queue,
- * ensures the correct locking order between freeze queue and q->rq_qos_mutex.
- *
- * This function returns negative error on failure. On success it returns
- * memflags which must be saved and later passed to blkg_conf_exit_frozen
- * for restoring the memalloc scope.
- */
-unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx)
-{
- int ret;
- unsigned long memflags;
-
- if (ctx->bdev)
- return -EINVAL;
-
- ret = blkg_conf_open_bdev(ctx);
- if (ret < 0)
- return ret;
- /*
- * At this point, we haven’t started protecting anything related to QoS,
- * so we release q->rq_qos_mutex here, which was first acquired in blkg_
- * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing
- * the queue to maintain the correct locking order.
- */
- mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
-
- memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue);
- mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex);
-
- return memflags;
-}
/**
* blkg_conf_prep - parse and prepare for per-blkg config update
@@ -978,7 +944,6 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
*/
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
- __releases(&ctx->bdev->bd_queue->rq_qos_mutex)
{
if (ctx->blkg) {
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
@@ -986,7 +951,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
}
if (ctx->bdev) {
- mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
blkdev_put_no_open(ctx->bdev);
ctx->body = NULL;
ctx->bdev = NULL;
@@ -994,20 +958,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
}
EXPORT_SYMBOL_GPL(blkg_conf_exit);
-/*
- * Similar to blkg_conf_exit, but also unfreezes the queue. Should be used
- * when blkg_conf_open_bdev_frozen is used to open the bdev.
- */
-void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags)
-{
- if (ctx->bdev) {
- struct request_queue *q = ctx->bdev->bd_queue;
-
- blkg_conf_exit(ctx);
- blk_mq_unfreeze_queue(q, memflags);
- }
-}
-
static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
{
int i;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1cce3294634d..d4e7f78ba545 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -219,11 +219,9 @@ struct blkg_conf_ctx {
void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
-unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
struct blkg_conf_ctx *ctx);
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
-void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags);
/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index ef543d163d46..104a9a9f563f 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3220,16 +3220,13 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
u32 qos[NR_QOS_PARAMS];
bool enable, user;
char *body, *p;
- unsigned long memflags;
int ret;
blkg_conf_init(&ctx, input);
- memflags = blkg_conf_open_bdev_frozen(&ctx);
- if (IS_ERR_VALUE(memflags)) {
- ret = memflags;
+ ret = blkg_conf_open_bdev(&ctx);
+ if (ret)
goto err;
- }
body = ctx.body;
disk = ctx.bdev->bd_disk;
@@ -3346,14 +3343,14 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
blk_mq_unquiesce_queue(disk->queue);
- blkg_conf_exit_frozen(&ctx, memflags);
+ blkg_conf_exit(&ctx);
return nbytes;
einval:
spin_unlock_irq(&ioc->lock);
blk_mq_unquiesce_queue(disk->queue);
ret = -EINVAL;
err:
- blkg_conf_exit_frozen(&ctx, memflags);
+ blkg_conf_exit(&ctx);
return ret;
}
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 85cf74402a09..fe96183bcc75 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -327,8 +327,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
{
struct request_queue *q = disk->queue;
unsigned int memflags;
-
- lockdep_assert_held(&q->rq_qos_mutex);
+ int ret = 0;
rqos->disk = disk;
rqos->id = id;
@@ -337,20 +336,24 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
/*
* No IO can be in-flight when adding rqos, so freeze queue, which
* is fine since we only support rq_qos for blk-mq queue.
+ *
+ * Acquire rq_qos_mutex after freezing the queue to ensure proper
+ * locking order.
*/
memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->rq_qos_mutex);
- if (rq_qos_id(q, rqos->id))
- goto ebusy;
- rqos->next = q->rq_qos;
- q->rq_qos = rqos;
- blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q);
+ if (rq_qos_id(q, rqos->id)) {
+ ret = -EBUSY;
+ } else {
+ rqos->next = q->rq_qos;
+ q->rq_qos = rqos;
+ blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q);
+ }
+ mutex_unlock(&q->rq_qos_mutex);
blk_mq_unfreeze_queue(q, memflags);
- return 0;
-ebusy:
- blk_mq_unfreeze_queue(q, memflags);
- return -EBUSY;
+ return ret;
}
void rq_qos_del(struct rq_qos *rqos)
@@ -359,9 +362,9 @@ void rq_qos_del(struct rq_qos *rqos)
struct rq_qos **cur;
unsigned int memflags;
- lockdep_assert_held(&q->rq_qos_mutex);
-
memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->rq_qos_mutex);
+
for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
if (*cur == rqos) {
*cur = rqos->next;
@@ -370,5 +373,7 @@ void rq_qos_del(struct rq_qos *rqos)
}
if (!q->rq_qos)
blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q);
+
+ mutex_unlock(&q->rq_qos_mutex);
blk_mq_unfreeze_queue(q, memflags);
}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 1415f2bf8611..a636dea27270 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -960,9 +960,7 @@ static int wbt_init(struct gendisk *disk, struct rq_wb *rwb)
/*
* Assign rwb and add the stats callback.
*/
- mutex_lock(&q->rq_qos_mutex);
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
- mutex_unlock(&q->rq_qos_mutex);
if (ret)
return ret;
--
2.51.0
Powered by blists - more mailing lists