lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu,  2 Jul 2015 23:29:52 +0900
From:	Akinobu Mita <akinobu.mita@...il.com>
To:	linux-kernel@...r.kernel.org
Cc:	Akinobu Mita <akinobu.mita@...il.com>,
	Jens Axboe <axboe@...nel.dk>, Ming Lei <tom.leiming@...il.com>
Subject: [PATCH v2 1/6] blk-mq: fix sysfs registration/unregistration race

There is a race between cpu hotplug handling and adding/deleting
gendisk for blk-mq, where both are trying to register and unregister
the same sysfs entries.

CPU hotplug handling for blk-mq (blk_mq_queue_reinit) does unregister
and register sysfs entries of hctx for all request queues in
all_q_list.  On the other hand, these entries for a request queue may
have already been unregistered or in the middle of registration when
CPU hotplug event occurs.  Because those sysfs entries are registered
by blk_mq_register_disk() after the request queue is added to
all_q_list, and similarily the request queue is deleted from
all_q_list after those are unregistered by blk_mq_unregister_disk().

So we need proper mutual exclusion for those registration and
unregistration.

Signed-off-by: Akinobu Mita <akinobu.mita@...il.com>
Cc: Jens Axboe <axboe@...nel.dk>
Cc: Ming Lei <tom.leiming@...il.com>
---
 block/blk-core.c       |  1 +
 block/blk-mq-sysfs.c   | 44 ++++++++++++++++++++++++++++++++++----------
 include/linux/blkdev.h |  3 +++
 3 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 82819e6..bbf67cd 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -687,6 +687,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
 	__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
 
 	init_waitqueue_head(&q->mq_freeze_wq);
+	mutex_init(&q->mq_sysfs_lock);
 
 	if (blkcg_init_queue(q))
 		goto fail_bdi;
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index b79685e..79a3e8d 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -332,13 +332,15 @@ static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx)
 	struct blk_mq_ctx *ctx;
 	int i;
 
-	if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP))
+	if (!(hctx->flags & BLK_MQ_F_SYSFS_UP))
 		return;
 
 	hctx_for_each_ctx(hctx, ctx, i)
 		kobject_del(&ctx->kobj);
 
 	kobject_del(&hctx->kobj);
+
+	hctx->flags &= ~BLK_MQ_F_SYSFS_UP;
 }
 
 static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
@@ -347,7 +349,7 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 	struct blk_mq_ctx *ctx;
 	int i, ret;
 
-	if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP))
+	if (!hctx->nr_ctx)
 		return 0;
 
 	ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num);
@@ -357,10 +359,12 @@ static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx)
 	hctx_for_each_ctx(hctx, ctx, i) {
 		ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu);
 		if (ret)
-			break;
+			return ret;
 	}
 
-	return ret;
+	hctx->flags |= BLK_MQ_F_SYSFS_UP;
+
+	return 0;
 }
 
 void blk_mq_unregister_disk(struct gendisk *disk)
@@ -370,6 +374,8 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	struct blk_mq_ctx *ctx;
 	int i, j;
 
+	mutex_lock(&q->mq_sysfs_lock);
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		blk_mq_unregister_hctx(hctx);
 
@@ -384,6 +390,9 @@ void blk_mq_unregister_disk(struct gendisk *disk)
 	kobject_put(&q->mq_kobj);
 
 	kobject_put(&disk_to_dev(disk)->kobj);
+
+	q->mq_sysfs_init_done = false;
+	mutex_unlock(&q->mq_sysfs_lock);
 }
 
 static void blk_mq_sysfs_init(struct request_queue *q)
@@ -414,27 +423,30 @@ int blk_mq_register_disk(struct gendisk *disk)
 	struct blk_mq_hw_ctx *hctx;
 	int ret, i;
 
+	mutex_lock(&q->mq_sysfs_lock);
+
 	blk_mq_sysfs_init(q);
 
 	ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq");
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	kobject_uevent(&q->mq_kobj, KOBJ_ADD);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		hctx->flags |= BLK_MQ_F_SYSFS_UP;
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
 			break;
 	}
 
-	if (ret) {
+	if (ret)
 		blk_mq_unregister_disk(disk);
-		return ret;
-	}
+	else
+		q->mq_sysfs_init_done = true;
+out:
+	mutex_unlock(&q->mq_sysfs_lock);
 
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(blk_mq_register_disk);
 
@@ -443,8 +455,14 @@ void blk_mq_sysfs_unregister(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i;
 
+	mutex_lock(&q->mq_sysfs_lock);
+	if (!q->mq_sysfs_init_done)
+		goto out;
+
 	queue_for_each_hw_ctx(q, hctx, i)
 		blk_mq_unregister_hctx(hctx);
+out:
+	mutex_unlock(&q->mq_sysfs_lock);
 }
 
 int blk_mq_sysfs_register(struct request_queue *q)
@@ -452,11 +470,17 @@ int blk_mq_sysfs_register(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	int i, ret = 0;
 
+	mutex_lock(&q->mq_sysfs_lock);
+	if (!q->mq_sysfs_init_done)
+		goto out;
+
 	queue_for_each_hw_ctx(q, hctx, i) {
 		ret = blk_mq_register_hctx(hctx);
 		if (ret)
 			break;
 	}
+out:
+	mutex_unlock(&q->mq_sysfs_lock);
 
 	return ret;
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d4068c1..c56f5a6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -462,6 +462,9 @@ struct request_queue {
 
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
+
+	struct mutex		mq_sysfs_lock;
+	bool			mq_sysfs_init_done;
 };
 
 #define QUEUE_FLAG_QUEUED	1	/* uses generic tag queueing */
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists