lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YU/oIu2uQ420ol8F@T590>
Date:   Sun, 26 Sep 2021 11:25:22 +0800
From:   Ming Lei <ming.lei@...hat.com>
To:     John Garry <john.garry@...wei.com>
Cc:     axboe@...nel.dk, linux-block@...r.kernel.org,
        linux-kernel@...r.kernel.org, linux-scsi@...r.kernel.org,
        hare@...e.de
Subject: Re: [PATCH v4 12/13] blk-mq: Use shared tags for shared sbitmap
 support

On Fri, Sep 24, 2021 at 04:28:29PM +0800, John Garry wrote:
> Currently we use separate sbitmap pairs and active_queues atomic_t for
> shared sbitmap support.
> 
> However a full sets of static requests are used per HW queue, which is
> quite wasteful, considering that the total number of requests usable at
> any given time across all HW queues is limited by the shared sbitmap depth.
> 
> As such, it is considerably more memory efficient in the case of shared
> sbitmap to allocate a set of static rqs per tag set or request queue, and
> not per HW queue.
> 
> So replace the sbitmap pairs and active_queues atomic_t with a shared
> tags per tagset and request queue, which will hold a set of shared static
> rqs.
> 
> Since there is now no valid HW queue index to be passed to the blk_mq_ops
> .init and .exit_request callbacks, pass an invalid index token. This
> changes the semantics of the APIs, such that the callback would need to
> validate the HW queue index before using it. Currently no user of shared
> sbitmap actually uses the HW queue index (as would be expected).
> 
> Continue to use term "shared sbitmap" for now, as the meaning is known.
> 
> Signed-off-by: John Garry <john.garry@...wei.com>
> ---
>  block/blk-mq-sched.c   | 82 ++++++++++++++++++-------------------
>  block/blk-mq-tag.c     | 61 ++++++++++------------------
>  block/blk-mq-tag.h     |  6 +--
>  block/blk-mq.c         | 91 +++++++++++++++++++++++-------------------
>  block/blk-mq.h         |  5 ++-
>  include/linux/blk-mq.h | 15 ++++---
>  include/linux/blkdev.h |  3 +-
>  7 files changed, 125 insertions(+), 138 deletions(-)
> 
> diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
> index d1b56bb9ac64..428da4949d80 100644
> --- a/block/blk-mq-sched.c
> +++ b/block/blk-mq-sched.c
> @@ -519,6 +519,11 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
>  					  struct blk_mq_hw_ctx *hctx,
>  					  unsigned int hctx_idx)
>  {
> +	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
> +		hctx->sched_tags = q->shared_sbitmap_tags;
> +		return 0;
> +	}
> +
>  	hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
>  						    q->nr_requests);
>  
> @@ -527,61 +532,54 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
>  	return 0;
>  }
>  
> +static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
> +{
> +	blk_mq_free_rq_map(queue->shared_sbitmap_tags);
> +	queue->shared_sbitmap_tags = NULL;
> +}
> +
>  /* called in queue's release handler, tagset has gone away */
> -static void blk_mq_sched_tags_teardown(struct request_queue *q)
> +static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
>  {
>  	struct blk_mq_hw_ctx *hctx;
>  	int i;
>  
>  	queue_for_each_hw_ctx(q, hctx, i) {
>  		if (hctx->sched_tags) {
> -			blk_mq_free_rq_map(hctx->sched_tags, hctx->flags);
> +			if (!blk_mq_is_sbitmap_shared(q->tag_set->flags))
> +				blk_mq_free_rq_map(hctx->sched_tags);
>  			hctx->sched_tags = NULL;
>  		}
>  	}
> +
> +	if (blk_mq_is_sbitmap_shared(flags))
> +		blk_mq_exit_sched_shared_sbitmap(q);
>  }
>  
>  static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
>  {
>  	struct blk_mq_tag_set *set = queue->tag_set;
> -	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
> -	struct blk_mq_hw_ctx *hctx;
> -	int ret, i;
>  
>  	/*
>  	 * Set initial depth at max so that we don't need to reallocate for
>  	 * updating nr_requests.
>  	 */
> -	ret = blk_mq_init_bitmaps(&queue->sched_bitmap_tags,
> -				  &queue->sched_breserved_tags,
> -				  MAX_SCHED_RQ, set->reserved_tags,
> -				  set->numa_node, alloc_policy);
> -	if (ret)
> -		return ret;
> -
> -	queue_for_each_hw_ctx(queue, hctx, i) {
> -		hctx->sched_tags->bitmap_tags =
> -					&queue->sched_bitmap_tags;
> -		hctx->sched_tags->breserved_tags =
> -					&queue->sched_breserved_tags;
> -	}
> +	queue->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
> +						BLK_MQ_NO_HCTX_IDX,
> +						MAX_SCHED_RQ);
> +	if (!queue->shared_sbitmap_tags)
> +		return -ENOMEM;
>  
>  	blk_mq_tag_update_sched_shared_sbitmap(queue);
>  
>  	return 0;
>  }
>  
> -static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
> -{
> -	sbitmap_queue_free(&queue->sched_bitmap_tags);
> -	sbitmap_queue_free(&queue->sched_breserved_tags);
> -}
> -
>  int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>  {
> +	unsigned int i, flags = q->tag_set->flags;
>  	struct blk_mq_hw_ctx *hctx;
>  	struct elevator_queue *eq;
> -	unsigned int i;
>  	int ret;
>  
>  	if (!e) {
> @@ -598,21 +596,21 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>  	q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
>  				   BLKDEV_DEFAULT_RQ);
>  
> -	queue_for_each_hw_ctx(q, hctx, i) {
> -		ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
> +	if (blk_mq_is_sbitmap_shared(flags)) {
> +		ret = blk_mq_init_sched_shared_sbitmap(q);
>  		if (ret)
> -			goto err_free_map_and_rqs;
> +			return ret;
>  	}
>  
> -	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
> -		ret = blk_mq_init_sched_shared_sbitmap(q);
> +	queue_for_each_hw_ctx(q, hctx, i) {
> +		ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
>  		if (ret)
>  			goto err_free_map_and_rqs;
>  	}
>  
>  	ret = e->ops.init_sched(q, e);
>  	if (ret)
> -		goto err_free_sbitmap;
> +		goto err_free_map_and_rqs;
>  
>  	blk_mq_debugfs_register_sched(q);
>  
> @@ -632,12 +630,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>  
>  	return 0;
>  
> -err_free_sbitmap:
> -	if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
> -		blk_mq_exit_sched_shared_sbitmap(q);
>  err_free_map_and_rqs:
>  	blk_mq_sched_free_rqs(q);
> -	blk_mq_sched_tags_teardown(q);
> +	blk_mq_sched_tags_teardown(q, flags);
> +
>  	q->elevator = NULL;
>  	return ret;
>  }
> @@ -651,9 +647,15 @@ void blk_mq_sched_free_rqs(struct request_queue *q)
>  	struct blk_mq_hw_ctx *hctx;
>  	int i;
>  
> -	queue_for_each_hw_ctx(q, hctx, i) {
> -		if (hctx->sched_tags)
> -			blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
> +	if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
> +		blk_mq_free_rqs(q->tag_set, q->shared_sbitmap_tags,
> +				BLK_MQ_NO_HCTX_IDX);
> +	} else {
> +		queue_for_each_hw_ctx(q, hctx, i) {
> +			if (hctx->sched_tags)
> +				blk_mq_free_rqs(q->tag_set,
> +						hctx->sched_tags, i);
> +		}
>  	}
>  }
>  
> @@ -674,8 +676,6 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
>  	blk_mq_debugfs_unregister_sched(q);
>  	if (e->type->ops.exit_sched)
>  		e->type->ops.exit_sched(e);
> -	blk_mq_sched_tags_teardown(q);
> -	if (blk_mq_is_sbitmap_shared(flags))
> -		blk_mq_exit_sched_shared_sbitmap(q);
> +	blk_mq_sched_tags_teardown(q, flags);
>  	q->elevator = NULL;
>  }
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index a0ecc6d88f84..4e71ce6b37ea 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -27,10 +27,11 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>  	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
>  		struct request_queue *q = hctx->queue;
>  		struct blk_mq_tag_set *set = q->tag_set;
> +		struct blk_mq_tags *tags = set->shared_sbitmap_tags;

The local variable of 'set' can be removed and just retrieve 'tags' from
hctx->tags.

>  
>  		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
>  		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
> -			atomic_inc(&set->active_queues_shared_sbitmap);
> +			atomic_inc(&tags->active_queues);
>  	} else {
>  		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
>  		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
> @@ -61,10 +62,12 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
>  	struct blk_mq_tag_set *set = q->tag_set;
>  
>  	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
> +		struct blk_mq_tags *tags = set->shared_sbitmap_tags;
> +

Same with above.

>  		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
>  					&q->queue_flags))
>  			return;
> -		atomic_dec(&set->active_queues_shared_sbitmap);
> +		atomic_dec(&tags->active_queues);
>  	} else {
>  		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
>  			return;
> @@ -510,38 +513,10 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
>  	return 0;
>  }
>  
> -int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set)
> -{
> -	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
> -	int i, ret;
> -
> -	ret = blk_mq_init_bitmaps(&set->__bitmap_tags, &set->__breserved_tags,
> -				  set->queue_depth, set->reserved_tags,
> -				  set->numa_node, alloc_policy);
> -	if (ret)
> -		return ret;
> -
> -	for (i = 0; i < set->nr_hw_queues; i++) {
> -		struct blk_mq_tags *tags = set->tags[i];
> -
> -		tags->bitmap_tags = &set->__bitmap_tags;
> -		tags->breserved_tags = &set->__breserved_tags;
> -	}
> -
> -	return 0;
> -}
> -
> -void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
> -{
> -	sbitmap_queue_free(&set->__bitmap_tags);
> -	sbitmap_queue_free(&set->__breserved_tags);
> -}
> -
>  struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
>  				     unsigned int reserved_tags,
> -				     int node, unsigned int flags)
> +				     int node, int alloc_policy)
>  {
> -	int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags);
>  	struct blk_mq_tags *tags;
>  
>  	if (total_tags > BLK_MQ_TAG_MAX) {
> @@ -557,9 +532,6 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
>  	tags->nr_reserved_tags = reserved_tags;
>  	spin_lock_init(&tags->lock);
>  
> -	if (blk_mq_is_sbitmap_shared(flags))
> -		return tags;
> -
>  	if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) {
>  		kfree(tags);
>  		return NULL;
> @@ -567,12 +539,10 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
>  	return tags;
>  }
>  
> -void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags)
> +void blk_mq_free_tags(struct blk_mq_tags *tags)
>  {
> -	if (!blk_mq_is_sbitmap_shared(flags)) {
> -		sbitmap_queue_free(tags->bitmap_tags);
> -		sbitmap_queue_free(tags->breserved_tags);
> -	}
> +	sbitmap_queue_free(tags->bitmap_tags);
> +	sbitmap_queue_free(tags->breserved_tags);
>  	kfree(tags);
>  }
>  
> @@ -603,6 +573,13 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
>  		if (tdepth > MAX_SCHED_RQ)
>  			return -EINVAL;
>  
> +		/*
> +		 * Only the sbitmap needs resizing since we allocated the max
> +		 * initially.
> +		 */
> +		if (blk_mq_is_sbitmap_shared(set->flags))
> +			return 0;
> +
>  		new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
>  		if (!new)
>  			return -ENOMEM;
> @@ -623,12 +600,14 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
>  
>  void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size)
>  {
> -	sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags);
> +	struct blk_mq_tags *tags = set->shared_sbitmap_tags;
> +
> +	sbitmap_queue_resize(&tags->__bitmap_tags, size - set->reserved_tags);
>  }
>  
>  void blk_mq_tag_update_sched_shared_sbitmap(struct request_queue *q)
>  {
> -	sbitmap_queue_resize(&q->sched_bitmap_tags,
> +	sbitmap_queue_resize(q->shared_sbitmap_tags->bitmap_tags,
>  			     q->nr_requests - q->tag_set->reserved_tags);
>  }
>  
> diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h
> index 88f3c6485543..e433e39a9cfa 100644
> --- a/block/blk-mq-tag.h
> +++ b/block/blk-mq-tag.h
> @@ -30,16 +30,14 @@ struct blk_mq_tags {
>  
>  extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
>  					unsigned int reserved_tags,
> -					int node, unsigned int flags);
> -extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags);
> +					int node, int alloc_policy);
> +extern void blk_mq_free_tags(struct blk_mq_tags *tags);
>  extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
>  			       struct sbitmap_queue *breserved_tags,
>  			       unsigned int queue_depth,
>  			       unsigned int reserved,
>  			       int node, int alloc_policy);
>  
> -extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set);
> -extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set);
>  extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
>  extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
>  			   unsigned int tag);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 464ea20b9bcb..ece43855bcdf 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2344,7 +2344,10 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
>  	struct blk_mq_tags *drv_tags;
>  	struct page *page;
>  
> -	drv_tags = set->tags[hctx_idx];
> +	if (blk_mq_is_sbitmap_shared(set->flags))
> +		drv_tags = set->shared_sbitmap_tags;
> +	else
> +		drv_tags = set->tags[hctx_idx];
>  
>  	if (tags->static_rqs && set->ops->exit_request) {
>  		int i;
> @@ -2373,21 +2376,20 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
>  	}
>  }
>  
> -void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags)
> +void blk_mq_free_rq_map(struct blk_mq_tags *tags)
>  {
>  	kfree(tags->rqs);
>  	tags->rqs = NULL;
>  	kfree(tags->static_rqs);
>  	tags->static_rqs = NULL;
>  
> -	blk_mq_free_tags(tags, flags);
> +	blk_mq_free_tags(tags);
>  }
>  
>  static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
>  					       unsigned int hctx_idx,
>  					       unsigned int nr_tags,
> -					       unsigned int reserved_tags,
> -					       unsigned int flags)
> +					       unsigned int reserved_tags)
>  {
>  	struct blk_mq_tags *tags;
>  	int node;
> @@ -2396,7 +2398,8 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
>  	if (node == NUMA_NO_NODE)
>  		node = set->numa_node;
>  
> -	tags = blk_mq_init_tags(nr_tags, reserved_tags, node, flags);
> +	tags = blk_mq_init_tags(nr_tags, reserved_tags, node,
> +				BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags));
>  	if (!tags)
>  		return NULL;
>  
> @@ -2404,7 +2407,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
>  				 GFP_NOIO | __GFP_NOWARN | __GFP_NORETRY,
>  				 node);
>  	if (!tags->rqs) {
> -		blk_mq_free_tags(tags, flags);
> +		blk_mq_free_tags(tags);
>  		return NULL;
>  	}
>  
> @@ -2413,7 +2416,7 @@ static struct blk_mq_tags *blk_mq_alloc_rq_map(struct blk_mq_tag_set *set,
>  					node);
>  	if (!tags->static_rqs) {
>  		kfree(tags->rqs);
> -		blk_mq_free_tags(tags, flags);
> +		blk_mq_free_tags(tags);
>  		return NULL;
>  	}
>  
> @@ -2855,14 +2858,13 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
>  	struct blk_mq_tags *tags;
>  	int ret;
>  
> -	tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags,
> -				   set->flags);
> +	tags = blk_mq_alloc_rq_map(set, hctx_idx, depth, set->reserved_tags);
>  	if (!tags)
>  		return NULL;
>  
>  	ret = blk_mq_alloc_rqs(set, tags, hctx_idx, depth);
>  	if (ret) {
> -		blk_mq_free_rq_map(tags, set->flags);
> +		blk_mq_free_rq_map(tags);
>  		return NULL;
>  	}
>  
> @@ -2872,6 +2874,12 @@ struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
>  static bool __blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
>  				       int hctx_idx)
>  {
> +	if (blk_mq_is_sbitmap_shared(set->flags)) {
> +		set->tags[hctx_idx] = set->shared_sbitmap_tags;
> +
> +		return true;
> +	}
> +
>  	set->tags[hctx_idx] = blk_mq_alloc_map_and_rqs(set, hctx_idx,
>  						       set->queue_depth);
>  
> @@ -2882,14 +2890,22 @@ void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
>  			     struct blk_mq_tags *tags,
>  			     unsigned int hctx_idx)
>  {
> -	unsigned int flags = set->flags;
> -
>  	if (tags) {
>  		blk_mq_free_rqs(set, tags, hctx_idx);
> -		blk_mq_free_rq_map(tags, flags);
> +		blk_mq_free_rq_map(tags);
>  	}
>  }
>  
> +static void __blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
> +				      struct blk_mq_tags *tags,
> +				      unsigned int hctx_idx)
> +{
> +	if (blk_mq_is_sbitmap_shared(set->flags))
> +		return;
> +
> +	blk_mq_free_map_and_rqs(set, tags, hctx_idx);
> +}
> +
>  static void blk_mq_map_swqueue(struct request_queue *q)
>  {
>  	unsigned int i, j, hctx_idx;
> @@ -2968,7 +2984,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
>  			 * allocation
>  			 */
>  			if (i && set->tags[i]) {
> -				blk_mq_free_map_and_rqs(set, set->tags[i], i);
> +				__blk_mq_free_map_and_rqs(set, set->tags[i], i);
>  				set->tags[i] = NULL;
>  			}
>  
> @@ -3266,7 +3282,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
>  		struct blk_mq_hw_ctx *hctx = hctxs[j];
>  
>  		if (hctx) {
> -			blk_mq_free_map_and_rqs(set, set->tags[j], j);
> +			__blk_mq_free_map_and_rqs(set, set->tags[j], j);
>  			set->tags[j] = NULL;
>  			blk_mq_exit_hctx(q, set, hctx, j);
>  			hctxs[j] = NULL;
> @@ -3354,6 +3370,14 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
>  {
>  	int i;
>  
> +	if (blk_mq_is_sbitmap_shared(set->flags)) {
> +		set->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
> +						BLK_MQ_NO_HCTX_IDX,
> +						set->queue_depth);
> +		if (!set->shared_sbitmap_tags)
> +			return -ENOMEM;
> +	}
> +
>  	for (i = 0; i < set->nr_hw_queues; i++) {
>  		if (!__blk_mq_alloc_map_and_rqs(set, i))
>  			goto out_unwind;
> @@ -3364,10 +3388,15 @@ static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
>  
>  out_unwind:
>  	while (--i >= 0) {
> -		blk_mq_free_map_and_rqs(set, set->tags[i], i);
> +		__blk_mq_free_map_and_rqs(set, set->tags[i], i);
>  		set->tags[i] = NULL;
>  	}
>  
> +	if (blk_mq_is_sbitmap_shared(set->flags)) {
> +		blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags,
> +					BLK_MQ_NO_HCTX_IDX);
> +	}
> +
>  	return -ENOMEM;
>  }
>  
> @@ -3546,25 +3575,11 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
>  	if (ret)
>  		goto out_free_mq_map;
>  
> -	if (blk_mq_is_sbitmap_shared(set->flags)) {
> -		atomic_set(&set->active_queues_shared_sbitmap, 0);
> -
> -		if (blk_mq_init_shared_sbitmap(set)) {
> -			ret = -ENOMEM;
> -			goto out_free_mq_rq_maps;
> -		}
> -	}
> -
>  	mutex_init(&set->tag_list_lock);
>  	INIT_LIST_HEAD(&set->tag_list);
>  
>  	return 0;
>  
> -out_free_mq_rq_maps:
> -	for (i = 0; i < set->nr_hw_queues; i++) {
> -		blk_mq_free_map_and_rqs(set, set->tags[i], i);
> -		set->tags[i] = NULL;
> -	}
>  out_free_mq_map:
>  	for (i = 0; i < set->nr_maps; i++) {
>  		kfree(set->map[i].mq_map);
> @@ -3597,12 +3612,14 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
>  	int i, j;
>  
>  	for (i = 0; i < set->nr_hw_queues; i++) {
> -		blk_mq_free_map_and_rqs(set, set->tags[i], i);
> +		__blk_mq_free_map_and_rqs(set, set->tags[i], i);
>  		set->tags[i] = NULL;
>  	}
>  
> -	if (blk_mq_is_sbitmap_shared(set->flags))
> -		blk_mq_exit_shared_sbitmap(set);
> +	if (blk_mq_is_sbitmap_shared(set->flags)) {
> +		blk_mq_free_map_and_rqs(set, set->shared_sbitmap_tags,
> +					BLK_MQ_NO_HCTX_IDX);
> +	}
>  
>  	for (j = 0; j < set->nr_maps; j++) {
>  		kfree(set->map[j].mq_map);
> @@ -3640,12 +3657,6 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
>  		if (hctx->sched_tags) {
>  			ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
>  						      nr, true);
> -			if (blk_mq_is_sbitmap_shared(set->flags)) {
> -				hctx->sched_tags->bitmap_tags =
> -					&q->sched_bitmap_tags;
> -				hctx->sched_tags->breserved_tags =
> -					&q->sched_breserved_tags;
> -			}
>  		} else {
>  			ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
>  						      false);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index bcb0ca89d37a..b34385211e0a 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -54,7 +54,7 @@ void blk_mq_put_rq_ref(struct request *rq);
>   */
>  void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
>  		     unsigned int hctx_idx);
> -void blk_mq_free_rq_map(struct blk_mq_tags *tags, unsigned int flags);
> +void blk_mq_free_rq_map(struct blk_mq_tags *tags);
>  struct blk_mq_tags *blk_mq_alloc_map_and_rqs(struct blk_mq_tag_set *set,
>  				unsigned int hctx_idx, unsigned int depth);
>  void blk_mq_free_map_and_rqs(struct blk_mq_tag_set *set,
> @@ -331,10 +331,11 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
>  	if (blk_mq_is_sbitmap_shared(hctx->flags)) {
>  		struct request_queue *q = hctx->queue;
>  		struct blk_mq_tag_set *set = q->tag_set;
> +		struct blk_mq_tags *tags = set->shared_sbitmap_tags;

Same with above, tags can be retrieved from hctx->tags directly,
and both 'q' and 'set' can be killed.

>  
>  		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
>  			return true;
> -		users = atomic_read(&set->active_queues_shared_sbitmap);
> +		users = atomic_read(&tags->active_queues);
>  	} else {
>  		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
>  			return true;
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 13ba1861e688..808854a8ebc4 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -232,13 +232,11 @@ enum hctx_type {
>   * @flags:	   Zero or more BLK_MQ_F_* flags.
>   * @driver_data:   Pointer to data owned by the block driver that created this
>   *		   tag set.
> - * @active_queues_shared_sbitmap:
> - * 		   number of active request queues per tag set.
> - * @__bitmap_tags: A shared tags sbitmap, used over all hctx's
> - * @__breserved_tags:
> - *		   A shared reserved tags sbitmap, used over all hctx's
>   * @tags:	   Tag sets. One tag set per hardware queue. Has @nr_hw_queues
>   *		   elements.
> + * @shared_sbitmap_tags:
> + *		   Shared sbitmap set of tags. Has @nr_hw_queues elements. If
> + *		   set, shared by all @tags.
>   * @tag_list_lock: Serializes tag_list accesses.
>   * @tag_list:	   List of the request queues that use this tag set. See also
>   *		   request_queue.tag_set_list.
> @@ -255,12 +253,11 @@ struct blk_mq_tag_set {
>  	unsigned int		timeout;
>  	unsigned int		flags;
>  	void			*driver_data;
> -	atomic_t		active_queues_shared_sbitmap;
>  
> -	struct sbitmap_queue	__bitmap_tags;
> -	struct sbitmap_queue	__breserved_tags;
>  	struct blk_mq_tags	**tags;
>  
> +	struct blk_mq_tags	*shared_sbitmap_tags;
> +
>  	struct mutex		tag_list_lock;
>  	struct list_head	tag_list;
>  };
> @@ -432,6 +429,8 @@ enum {
>  	((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \
>  		<< BLK_MQ_F_ALLOC_POLICY_START_BIT)
>  
> +#define BLK_MQ_NO_HCTX_IDX	(-1U)
> +
>  struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata,
>  		struct lock_class_key *lkclass);
>  #define blk_mq_alloc_disk(set, queuedata)				\
> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
> index 4baf9435232d..17e50e5ef47b 100644
> --- a/include/linux/blkdev.h
> +++ b/include/linux/blkdev.h
> @@ -459,8 +459,7 @@ struct request_queue {
>  
>  	atomic_t		nr_active_requests_shared_sbitmap;
>  
> -	struct sbitmap_queue	sched_bitmap_tags;
> -	struct sbitmap_queue	sched_breserved_tags;
> +	struct blk_mq_tags	*shared_sbitmap_tags;

Maybe better with shared_sched_sbitmap_tags or sched_sbitmap_tags?

Nice cleanup, once the above comments are addressed, feel free to
add:

Reviewed-by: Ming Lei <ming.lei@...hat.com>


Thanks,
Ming

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ