lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260203080602.726505-8-yukuai@fnnas.com>
Date: Tue,  3 Feb 2026 16:06:02 +0800
From: Yu Kuai <yukuai@...as.com>
To: tj@...nel.org,
	josef@...icpanda.com,
	axboe@...nel.dk
Cc: cgroups@...r.kernel.org,
	linux-block@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	yukuai@...as.com,
	zhengqixing@...wei.com,
	mkoutny@...e.com,
	hch@...radead.org,
	ming.lei@...hat.com,
	nilay@...ux.ibm.com
Subject: [PATCH v2 7/7] blk-rq-qos: move rq_qos_mutex acquisition inside rq_qos_add/del

The current rq_qos_mutex handling has an awkward pattern where callers
must acquire the mutex before calling rq_qos_add()/rq_qos_del(), and
blkg_conf_open_bdev_frozen() had to release and re-acquire the mutex
around queue freezing to maintain proper locking order (freeze queue
before mutex).

On the other hand, with rq_qos_mutex held after blkg_conf_prep(), there
are many possible deadlocks:

- allocating memory with GFP_KERNEL, like blk_throtl_init();
- allocating percpu memory, like pd_alloc_fn() for iocost/iolatency;

This patch refactors the locking by:

1. Moving queue freeze and rq_qos_mutex acquisition inside
   rq_qos_add()/rq_qos_del(), with the correct order: freeze first,
   then acquire mutex.

2. Removing external mutex handling from wbt_init() since rq_qos_add()
   now handles it internally.

3. Removing rq_qos_mutex handling from blkg_conf_open_bdev() entirely,
   making it only responsible for parsing MAJ:MIN and opening the bdev.

4. Removing blkg_conf_open_bdev_frozen() and blkg_conf_exit_frozen()
   functions which are no longer needed.

5. Updating ioc_qos_write() to use the simpler blkg_conf_open_bdev()
   and blkg_conf_exit() functions.

This eliminates the release-and-reacquire pattern and makes
rq_qos_add()/rq_qos_del() self-contained, which is cleaner and reduces
complexity. Each function now properly manages its own locking with
the correct order: queue freeze → mutex acquire → modify → mutex
release → queue unfreeze.

Signed-off-by: Yu Kuai <yukuai@...as.com>
---
 block/blk-cgroup.c | 50 ----------------------------------------------
 block/blk-cgroup.h |  2 --
 block/blk-iocost.c | 11 ++++------
 block/blk-rq-qos.c | 31 ++++++++++++++++------------
 block/blk-wbt.c    |  2 --
 5 files changed, 22 insertions(+), 74 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 7fcb216917d0..d17d2b44df43 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -802,10 +802,8 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
 		return -ENODEV;
 	}
 
-	mutex_lock(&bdev->bd_queue->rq_qos_mutex);
 	if (!disk_live(bdev->bd_disk)) {
 		blkdev_put_no_open(bdev);
-		mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
 		return -ENODEV;
 	}
 
@@ -813,38 +811,6 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
 	ctx->bdev = bdev;
 	return 0;
 }
-/*
- * Similar to blkg_conf_open_bdev, but additionally freezes the queue,
- * ensures the correct locking order between freeze queue and q->rq_qos_mutex.
- *
- * This function returns negative error on failure. On success it returns
- * memflags which must be saved and later passed to blkg_conf_exit_frozen
- * for restoring the memalloc scope.
- */
-unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx)
-{
-	int ret;
-	unsigned long memflags;
-
-	if (ctx->bdev)
-		return -EINVAL;
-
-	ret = blkg_conf_open_bdev(ctx);
-	if (ret < 0)
-		return ret;
-	/*
-	 * At this point, we haven’t started protecting anything related to QoS,
-	 * so we release q->rq_qos_mutex here, which was first acquired in blkg_
-	 * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing
-	 * the queue to maintain the correct locking order.
-	 */
-	mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
-
-	memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue);
-	mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex);
-
-	return memflags;
-}
 
 /**
  * blkg_conf_prep - parse and prepare for per-blkg config update
@@ -978,7 +944,6 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
  */
 void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 	__releases(&ctx->bdev->bd_queue->queue_lock)
-	__releases(&ctx->bdev->bd_queue->rq_qos_mutex)
 {
 	if (ctx->blkg) {
 		spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
@@ -986,7 +951,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 	}
 
 	if (ctx->bdev) {
-		mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
 		blkdev_put_no_open(ctx->bdev);
 		ctx->body = NULL;
 		ctx->bdev = NULL;
@@ -994,20 +958,6 @@ void blkg_conf_exit(struct blkg_conf_ctx *ctx)
 }
 EXPORT_SYMBOL_GPL(blkg_conf_exit);
 
-/*
- * Similar to blkg_conf_exit, but also unfreezes the queue. Should be used
- * when blkg_conf_open_bdev_frozen is used to open the bdev.
- */
-void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags)
-{
-	if (ctx->bdev) {
-		struct request_queue *q = ctx->bdev->bd_queue;
-
-		blkg_conf_exit(ctx);
-		blk_mq_unfreeze_queue(q, memflags);
-	}
-}
-
 static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
 {
 	int i;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 1cce3294634d..d4e7f78ba545 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -219,11 +219,9 @@ struct blkg_conf_ctx {
 
 void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
 int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
-unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx);
 int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
 		   struct blkg_conf_ctx *ctx);
 void blkg_conf_exit(struct blkg_conf_ctx *ctx);
-void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags);
 
 /**
  * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index ef543d163d46..104a9a9f563f 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3220,16 +3220,13 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 	u32 qos[NR_QOS_PARAMS];
 	bool enable, user;
 	char *body, *p;
-	unsigned long memflags;
 	int ret;
 
 	blkg_conf_init(&ctx, input);
 
-	memflags = blkg_conf_open_bdev_frozen(&ctx);
-	if (IS_ERR_VALUE(memflags)) {
-		ret = memflags;
+	ret = blkg_conf_open_bdev(&ctx);
+	if (ret)
 		goto err;
-	}
 
 	body = ctx.body;
 	disk = ctx.bdev->bd_disk;
@@ -3346,14 +3343,14 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
 
 	blk_mq_unquiesce_queue(disk->queue);
 
-	blkg_conf_exit_frozen(&ctx, memflags);
+	blkg_conf_exit(&ctx);
 	return nbytes;
 einval:
 	spin_unlock_irq(&ioc->lock);
 	blk_mq_unquiesce_queue(disk->queue);
 	ret = -EINVAL;
 err:
-	blkg_conf_exit_frozen(&ctx, memflags);
+	blkg_conf_exit(&ctx);
 	return ret;
 }
 
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 85cf74402a09..fe96183bcc75 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -327,8 +327,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 {
 	struct request_queue *q = disk->queue;
 	unsigned int memflags;
-
-	lockdep_assert_held(&q->rq_qos_mutex);
+	int ret = 0;
 
 	rqos->disk = disk;
 	rqos->id = id;
@@ -337,20 +336,24 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 	/*
 	 * No IO can be in-flight when adding rqos, so freeze queue, which
 	 * is fine since we only support rq_qos for blk-mq queue.
+	 *
+	 * Acquire rq_qos_mutex after freezing the queue to ensure proper
+	 * locking order.
 	 */
 	memflags = blk_mq_freeze_queue(q);
+	mutex_lock(&q->rq_qos_mutex);
 
-	if (rq_qos_id(q, rqos->id))
-		goto ebusy;
-	rqos->next = q->rq_qos;
-	q->rq_qos = rqos;
-	blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q);
+	if (rq_qos_id(q, rqos->id)) {
+		ret = -EBUSY;
+	} else {
+		rqos->next = q->rq_qos;
+		q->rq_qos = rqos;
+		blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q);
+	}
 
+	mutex_unlock(&q->rq_qos_mutex);
 	blk_mq_unfreeze_queue(q, memflags);
-	return 0;
-ebusy:
-	blk_mq_unfreeze_queue(q, memflags);
-	return -EBUSY;
+	return ret;
 }
 
 void rq_qos_del(struct rq_qos *rqos)
@@ -359,9 +362,9 @@ void rq_qos_del(struct rq_qos *rqos)
 	struct rq_qos **cur;
 	unsigned int memflags;
 
-	lockdep_assert_held(&q->rq_qos_mutex);
-
 	memflags = blk_mq_freeze_queue(q);
+	mutex_lock(&q->rq_qos_mutex);
+
 	for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
 		if (*cur == rqos) {
 			*cur = rqos->next;
@@ -370,5 +373,7 @@ void rq_qos_del(struct rq_qos *rqos)
 	}
 	if (!q->rq_qos)
 		blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q);
+
+	mutex_unlock(&q->rq_qos_mutex);
 	blk_mq_unfreeze_queue(q, memflags);
 }
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 1415f2bf8611..a636dea27270 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -960,9 +960,7 @@ static int wbt_init(struct gendisk *disk, struct rq_wb *rwb)
 	/*
 	 * Assign rwb and add the stats callback.
 	 */
-	mutex_lock(&q->rq_qos_mutex);
 	ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
-	mutex_unlock(&q->rq_qos_mutex);
 	if (ret)
 		return ret;
 
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ