lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YeAEM5k+KjqTj0iN@smile.fi.intel.com>
Date:   Thu, 13 Jan 2022 12:51:31 +0200
From:   Andy Shevchenko <andriy.shevchenko@...ux.intel.com>
To:     Laibin Qiu <qiulaibin@...wei.com>
Cc:     axboe@...nel.dk, ming.lei@...hat.com, john.garry@...wei.com,
        martin.petersen@...cle.com, hare@...e.de,
        akpm@...ux-foundation.org, bvanassche@....org,
        linux-block@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH -next V5] blk-mq: fix tag_get wait task can't be awakened

On Thu, Jan 13, 2022 at 10:55:36AM +0800, Laibin Qiu wrote:
> In case of shared tags, there might be more than one hctx which
> allocates from the same tags, and each hctx is limited to allocate at
> most:
>         hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);
> 
> tag idle detection is lazy, and may be delayed for 30sec, so there
> could be just one real active hctx(queue) but all others are actually
> idle and still accounted as active because of the lazy idle detection.
> Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
> forever on this real active hctx.
> 
> Fix this by recalculating wake_batch when inc or dec active_queues.

FWIW,
Reviewed-by: Andy Shevchenko <andriy.shevchenko@...ux.intel.com>

Thanks!

> Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
> Suggested-by: Ming Lei <ming.lei@...hat.com>
> Suggested-by: John Garry <john.garry@...wei.com>
> Signed-off-by: Laibin Qiu <qiulaibin@...wei.com>
> ---
>  block/blk-mq-tag.c      | 40 +++++++++++++++++++++++++++++++++-------
>  include/linux/sbitmap.h | 11 +++++++++++
>  lib/sbitmap.c           | 25 ++++++++++++++++++++++---
>  3 files changed, 66 insertions(+), 10 deletions(-)
> 
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index e55a6834c9a6..845f74e8dd7b 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -16,6 +16,21 @@
>  #include "blk-mq-sched.h"
>  #include "blk-mq-tag.h"
>  
> +/*
> + * Recalculate wakeup batch when tag is shared by hctx.
> + */
> +static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
> +		unsigned int users)
> +{
> +	if (!users)
> +		return;
> +
> +	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
> +			users);
> +	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
> +			users);
> +}
> +
>  /*
>   * If a previously inactive queue goes active, bump the active user count.
>   * We need to do this before try to allocate driver tag, then even if fail
> @@ -24,18 +39,26 @@
>   */
>  bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>  {
> +	unsigned int users;
> +
>  	if (blk_mq_is_shared_tags(hctx->flags)) {
>  		struct request_queue *q = hctx->queue;
>  
> -		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
> -		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
> -			atomic_inc(&hctx->tags->active_queues);
> +		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
> +		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
> +			return true;
> +		}
>  	} else {
> -		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
> -		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
> -			atomic_inc(&hctx->tags->active_queues);
> +		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
> +		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
> +			return true;
> +		}
>  	}
>  
> +	users = atomic_inc_return(&hctx->tags->active_queues);
> +
> +	blk_mq_update_wake_batch(hctx->tags, users);
> +
>  	return true;
>  }
>  
> @@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
>  void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>  {
>  	struct blk_mq_tags *tags = hctx->tags;
> +	unsigned int users;
>  
>  	if (blk_mq_is_shared_tags(hctx->flags)) {
>  		struct request_queue *q = hctx->queue;
> @@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>  			return;
>  	}
>  
> -	atomic_dec(&tags->active_queues);
> +	users = atomic_dec_return(&tags->active_queues);
> +
> +	blk_mq_update_wake_batch(tags, users);
>  
>  	blk_mq_tag_wakeup_all(tags, false);
>  }
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index fc0357a6e19b..95df357ec009 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
>  	sbitmap_free(&sbq->sb);
>  }
>  
> +/**
> + * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
> + * @sbq: Bitmap queue to recalculate wake batch.
> + * @users: Number of shares.
> + *
> + * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
> + * by depth. This interface is for HCTX shared tags or queue shared tags.
> + */
> +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int users);
> +
>  /**
>   * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
>   * @sbq: Bitmap queue to resize.
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 2709ab825499..6220fa67fb7e 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
>  }
>  EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
>  
> -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> -					    unsigned int depth)
> +static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int wake_batch)
>  {
> -	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
>  	int i;
>  
>  	if (sbq->wake_batch != wake_batch) {
> @@ -476,6 +475,26 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
>  	}
>  }
>  
> +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int depth)
> +{
> +	unsigned int wake_batch;
> +
> +	wake_batch = sbq_calc_wake_batch(sbq, depth);
> +	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
> +}
> +
> +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int users)
> +{
> +	unsigned int wake_batch;
> +
> +	wake_batch = clamp_val((sbq->sb.depth + users - 1) /
> +			users, 4, SBQ_WAKE_BATCH);
> +	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
> +}
> +EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
> +
>  void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
>  {
>  	sbitmap_queue_update_wake_batch(sbq, depth);
> -- 
> 2.22.0
> 

-- 
With Best Regards,
Andy Shevchenko


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ