linux-kernel - Re: [PATCH 3/3] blk-mq: Fix the queue freezing mechanism

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 24 Sep 2015 11:22:51 +0800
From:	Ming Lei <tom.leiming@...il.com>
To:	Bart Van Assche <bart.vanassche@...disk.com>
Cc:	Jens Axboe <axboe@...com>, Christoph Hellwig <hch@....de>,
	Tejun Heo <tj@...nel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	tom.leiming@...il.com, Akinobu Mita <akinobu.mita@...il.com>
Subject: Re: [PATCH 3/3] blk-mq: Fix the queue freezing mechanism

On Wed, 23 Sep 2015 15:14:10 -0700
Bart Van Assche <bart.vanassche@...disk.com> wrote:

> Ensure that blk_mq_queue_enter() waits if mq_freeze_depth is not
> zero. Ensure that the update of mq_freeze_depth by blk_mq_freeze_queue()
> is visible by all CPU cores before that function waits on
> mq_usage_counter.
> 
> It is unfortunate that this patch introduces an smp_mb() in the
> hot path (blk_mq_queue_enter()) but I have not yet found a way to
> avoid this.
> 
> I came across this code while analyzing a lockup triggered by
> deleting a SCSI host created by the SRP initiator immediately
> followed by a relogin.
> 
> Signed-off-by: Bart Van Assche <bart.vanassche@...disk.com>
> Cc: Christoph Hellwig <hch@....de>
> Cc: Tejun Heo <tj@...nel.org>
> Cc: <stable@...r.kernel.org>
> ---
>  block/blk-mq.c | 14 ++++++++++++--
>  1 file changed, 12 insertions(+), 2 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 2077f0d..e3ad411 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -83,8 +83,13 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
>  	while (true) {
>  		int ret;
>  
> -		if (percpu_ref_tryget_live(&q->mq_usage_counter))
> -			return 0;
> +		if (percpu_ref_tryget_live(&q->mq_usage_counter)) {
> +			/* Order mq_use_counter and mq_freeze_depth accesses */
> +			smp_mb();
> +			if (!atomic_read(&q->mq_freeze_depth))
> +				return 0;
> +			percpu_ref_put(&q->mq_usage_counter);
> +		}

IMO, mq_freeze_depth should only be accessed in slow path, and looks
the race just happens during the small window between increasing
'mq_freeze_depth' and killing the percpu counter.

One solution I thought of is the following patch, which depends on
Akinobu's patch (blk-mq: fix freeze queue race
http://marc.info/?l=linux-kernel&m=143723697010781&w=2).

---
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f774f67..1c71c04 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -77,6 +77,17 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 	clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word);
 }
 
+static inline int blk_mq_read_freeze_depth(struct request_queue *q)
+{
+	int  depth;
+
+	mutex_lock(&q->mq_freeze_lock);
+	depth = q->mq_freeze_depth;
+	mutex_unlock(&q->mq_freeze_lock);
+
+	return depth;
+}
+
 static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
 {
 	while (true) {
@@ -89,7 +100,7 @@ static int blk_mq_queue_enter(struct request_queue *q, gfp_t gfp)
 			return -EBUSY;
 
 		ret = wait_event_interruptible(q->mq_freeze_wq,
-				!atomic_read(&q->mq_freeze_depth) ||
+				!blk_mq_read_freeze_depth(q) ||
 				blk_queue_dying(q));
 		if (blk_queue_dying(q))
 			return -ENODEV;
@@ -113,12 +124,9 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
 
 void blk_mq_freeze_queue_start(struct request_queue *q)
 {
-	int freeze_depth;
-
 	mutex_lock(&q->mq_freeze_lock);
 
-	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
-	if (freeze_depth == 1) {
+	if (!q->mq_freeze_depth++) {
 		percpu_ref_kill(&q->mq_usage_counter);
 		blk_mq_run_hw_queues(q, false);
 	}
@@ -149,7 +157,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
 
 	mutex_lock(&q->mq_freeze_lock);
 
-	freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
+	freeze_depth = --q->mq_freeze_depth;
 	WARN_ON_ONCE(freeze_depth < 0);
 	if (!freeze_depth) {
 		percpu_ref_reinit(&q->mq_usage_counter);
@@ -2084,7 +2092,7 @@ void blk_mq_free_queue(struct request_queue *q)
 /* Basically redo blk_mq_init_queue with queue frozen */
 static void blk_mq_queue_reinit(struct request_queue *q)
 {
-	WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
+	WARN_ON_ONCE(!ACCESS_ONCE(q->mq_freeze_depth));
 
 	blk_mq_sysfs_unregister(q);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6cdf2b7..86fedcc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -436,7 +436,7 @@ struct request_queue {
 	struct mutex		sysfs_lock;
 
 	int			bypass_depth;
-	atomic_t		mq_freeze_depth;
+	int			mq_freeze_depth;
 
 #if defined(CONFIG_BLK_DEV_BSG)
 	bsg_job_fn		*bsg_job_fn;



>  
>  		if (!(gfp & __GFP_WAIT))
>  			return -EBUSY;
> @@ -136,6 +141,11 @@ static void blk_mq_freeze_queue_wait(struct request_queue *q)
>  void blk_mq_freeze_queue(struct request_queue *q)
>  {
>  	blk_mq_freeze_queue_start(q);
> +	/*
> +	 * Ensure that the mq_freeze_depth update is visiable before
> +	 * mq_use_counter is read.
> +	 */
> +	smp_mb();
>  	blk_mq_freeze_queue_wait(q);
>  }
>  EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/