lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1430d56b-ed99-6c30-85e2-11d0a8a40a12@huawei.com>
Date:   Fri, 7 Jan 2022 12:05:33 +0000
From:   John Garry <john.garry@...wei.com>
To:     qiulaibin <qiulaibin@...wei.com>,
        "axboe@...nel.dk" <axboe@...nel.dk>,
        "ming.lei@...hat.com" <ming.lei@...hat.com>
CC:     "martin.petersen@...cle.com" <martin.petersen@...cle.com>,
        "hare@...e.de" <hare@...e.de>,
        "andriy.shevchenko@...ux.intel.com" 
        <andriy.shevchenko@...ux.intel.com>,
        "bvanassche@....org" <bvanassche@....org>,
        "linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH -next v3] blk-mq: fix tag_get wait task can't be awakened

On 06/01/2022 13:34, qiulaibin wrote:
> In case of shared tags, there might be more than one hctx which
> allocates tag from single tags,

how about "allocates from the same tags"?

> and each hctx is limited to allocate at
> most:
>          hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U);
> 
> tag idle detection is lazy, and may be delayed for 30sec, so there
> could be just one real active hctx(queue) but all others are actually
> idle and still accounted as active because of the lazy idle detection.
> Then if wake_batch is > hctx_max_depth, driver tag allocation may wait
> forever on this real active hctx.
> 
> Fix this by recalculating wake_batch when inc or dec active_queues.
> 
> Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps")
> Suggested-by: Ming Lei <ming.lei@...hat.com>
> Signed-off-by: Laibin Qiu <qiulaibin@...wei.com>
> ---
>   block/blk-mq-tag.c      | 32 ++++++++++++++++++++++++++++++--
>   include/linux/sbitmap.h | 11 +++++++++++
>   lib/sbitmap.c           | 22 +++++++++++++++++++---
>   3 files changed, 60 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index e55a6834c9a6..e59ebf89c1bf 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -16,6 +16,21 @@
>   #include "blk-mq-sched.h"
>   #include "blk-mq-tag.h"
>   
> +/*
> + * Recalculate wakeup batch when tag is shared by hctx.
> + */
> +static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
> +		unsigned int users)
> +{
> +	if (!users)
> +		return;
> +
> +	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
> +			users);
> +	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
> +			users);
> +}
> +
>   /*
>    * If a previously inactive queue goes active, bump the active user count.
>    * We need to do this before try to allocate driver tag, then even if fail
> @@ -24,16 +39,25 @@
>    */
>   bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>   {
> +	unsigned int users;
>   	if (blk_mq_is_shared_tags(hctx->flags)) {
>   		struct request_queue *q = hctx->queue;
>   
>   		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
> -		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
> +		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
>   			atomic_inc(&hctx->tags->active_queues);
> +
> +			users = atomic_read(&hctx->tags->active_queues);
> +			blk_mq_update_wake_batch(hctx->tags, users);
> +		}
>   	} else {
>   		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
> -		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
> +		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
>   			atomic_inc(&hctx->tags->active_queues);
> +
> +			users = atomic_read(&hctx->tags->active_queues);

atomic_inc_return() can do inc and read together

> +			blk_mq_update_wake_batch(hctx->tags, users);
> +		}

there seems to be more duplicated code here now, as we do the same in 
both legs of the if-else statement.

>   	}
>   
>   	return true;
> @@ -56,6 +80,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
>   void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>   {
>   	struct blk_mq_tags *tags = hctx->tags;
> +	unsigned int users;
>   
>   	if (blk_mq_is_shared_tags(hctx->flags)) {
>   		struct request_queue *q = hctx->queue;
> @@ -70,6 +95,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>   
>   	atomic_dec(&tags->active_queues);
>   
> +	users = atomic_read(&hctx->tags->active_queues);

as above, atomic_dec_return()

> +	blk_mq_update_wake_batch(hctx->tags, users);
> +
>   	blk_mq_tag_wakeup_all(tags, false);
>   }
>   
> diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
> index fc0357a6e19b..e1fced98dfca 100644
> --- a/include/linux/sbitmap.h
> +++ b/include/linux/sbitmap.h
> @@ -415,6 +415,17 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq)
>   	sbitmap_free(&sbq->sb);
>   }
>   
> +/**
> + * sbitmap_queue_recalculate_wake_batch() - Recalculate wake batch
> + * @sbq: Bitmap queue to Recalculate wake batch.

/s/Recalculate /recalculate/



> + * @users: Number of shares.
> + *
> + * Like sbitmap_queue_update_wake_batch(), this will calculate wake batch
> + * by depth. This interface is for sharing tags.

we have concept of shared tags (flag BLK_MQ_F_TAG_HCTX_SHARED) and queue 
shared tags (flag BLK_MQ_F_TAG_QUEUE_SHARED) - please be careful in the 
terminology

> + */
> +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int users);
> +
>   /**
>    * sbitmap_queue_resize() - Resize a &struct sbitmap_queue.
>    * @sbq: Bitmap queue to resize.
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 2709ab825499..94b3272effd8 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -457,10 +457,9 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
>   }
>   EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
>   
> -static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> -					    unsigned int depth)
> +static inline void __sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int wake_batch)
>   {
> -	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
>   	int i;
>   
>   	if (sbq->wake_batch != wake_batch) {
> @@ -476,6 +475,23 @@ static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
>   	}
>   }
>   
> +static void sbitmap_queue_update_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int depth)
> +{
> +	unsigned int wake_batch = sbq_calc_wake_batch(sbq, depth);
> +
> +	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
> +}
> +
> +void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq,
> +					    unsigned int users)
> +{
> +	unsigned int wake_batch = clamp_t(unsigned int,
> +			(sbq->sb.depth + users - 1) / users, 4U, SBQ_WAKE_BATCH);
> +	__sbitmap_queue_update_wake_batch(sbq, wake_batch);
> +}
> +EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch);
> +
>   void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
>   {
>   	sbitmap_queue_update_wake_batch(sbq, depth);
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ