[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db93e0ba-118a-a4f6-41a8-064353568ef7@huawei.com>
Date: Thu, 21 Nov 2019 10:24:16 +0000
From: John Garry <john.garry@...wei.com>
To: Ming Lei <ming.lei@...hat.com>
CC: "axboe@...nel.dk" <axboe@...nel.dk>,
"jejb@...ux.ibm.com" <jejb@...ux.ibm.com>,
"martin.petersen@...cle.com" <martin.petersen@...cle.com>,
"linux-block@...r.kernel.org" <linux-block@...r.kernel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>,
"hare@...e.com" <hare@...e.com>,
"bvanassche@....org" <bvanassche@....org>,
"chenxiang (M)" <chenxiang66@...ilicon.com>
Subject: Re: [PATCH RFC V2 3/5] blk-mq: Facilitate a shared sbitmap per tagset
>>
>> int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>> {
>> + struct blk_mq_tag_set *tag_set = q->tag_set;
>> struct blk_mq_hw_ctx *hctx;
>> struct elevator_queue *eq;
>> unsigned int i;
>> @@ -537,6 +538,19 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
>> blk_mq_debugfs_register_sched_hctx(q, hctx);
>> }
>>
>> + if (blk_mq_is_sbitmap_shared(tag_set)) {
>> + if (!blk_mq_init_sched_shared_sbitmap(tag_set, q->nr_requests)) {
>> + ret = -ENOMEM;
>> + goto err;
>> + }
>> + queue_for_each_hw_ctx(q, hctx, i) {
>> + struct blk_mq_tags *tags = hctx->sched_tags;
>> +
>> + tags->pbitmap_tags = &tag_set->sched_shared_bitmap_tags;
>> + tags->pbreserved_tags = &tag_set->sched_shared_breserved_tags;
>
> This kind of sharing is wrong, sched tags should be request queue wide
> instead of tagset wide, and each request queue has its own & independent
> scheduler queue.
Right, so if we get get a scheduler tag we still need to get a driver
tag, and this would be the "shared" tag.
That makes things simpler then.
>
>> + }
>> + }
>> +
>> return 0;
>>
>> err:
>> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
>> index 42792942b428..6625bebb46c3 100644
>> --- a/block/blk-mq-tag.c
>> +++ b/block/blk-mq-tag.c
>> @@ -35,9 +35,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>> */
>> void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
>> {
>> - sbitmap_queue_wake_all(&tags->bitmap_tags);
>> + sbitmap_queue_wake_all(tags->pbitmap_tags);
>> if (include_reserve)
>> - sbitmap_queue_wake_all(&tags->breserved_tags);
>> + sbitmap_queue_wake_all(tags->pbreserved_tags);
>> }
>>
[...]
>> mutex_init(&set->tag_list_lock);
>> INIT_LIST_HEAD(&set->tag_list);
>>
>> @@ -3137,6 +3151,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
>> {
>> struct blk_mq_tag_set *set = q->tag_set;
>> struct blk_mq_hw_ctx *hctx;
>> + bool sched_tags = false;
>> int i, ret;
>>
>> if (!set)
>> @@ -3160,6 +3175,7 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
>> ret = blk_mq_tag_update_depth(hctx, &hctx->tags, nr,
>> false);
>> } else {
>> + sched_tags = true;
>> ret = blk_mq_tag_update_depth(hctx, &hctx->sched_tags,
>> nr, true);
>> }
>> @@ -3169,8 +3185,41 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
>> q->elevator->type->ops.depth_updated(hctx);
>> }
>>
>> - if (!ret)
>> + /*
>> + * if ret is 0, all queues should have been updated to the same depth
>> + * if not, then maybe some have been updated - yuk, need to handle this for shared sbitmap...
>> + * if some are updated, we should probably roll back the change altogether. FIXME
>> + */
>> + if (!ret) {
>> + if (blk_mq_is_sbitmap_shared(set)) {
>> + if (sched_tags) {
>> + sbitmap_queue_free(&set->sched_shared_bitmap_tags);
>> + sbitmap_queue_free(&set->sched_shared_breserved_tags);
>> + if (!blk_mq_init_sched_shared_sbitmap(set, nr))
>> + return -ENOMEM; /* fixup error handling */
>> +
>> + queue_for_each_hw_ctx(q, hctx, i) {
>> + hctx->sched_tags->pbitmap_tags = &set->sched_shared_bitmap_tags;
>> + hctx->sched_tags->pbreserved_tags = &set->sched_shared_breserved_tags;
>> + }
>> + } else {
>> + sbitmap_queue_free(&set->shared_bitmap_tags);
>> + sbitmap_queue_free(&set->shared_breserved_tags);
>> + if (!blk_mq_init_shared_sbitmap(set))
>> + return -ENOMEM; /* fixup error handling */
>
> No, we can't re-allocate driver tags here which are shared by all LUNs. > And you should see that 'can_grow' is set as false for driver tags
> in blk_mq_update_nr_requests(), which can only touch per-request-queue
> data, not tagset wide data.
Yeah, I see that. We should just resize for driver tags bitmap.
Personally I think the mainline code is a little loose here, as if we
could grow driver tags, then blk_mq_tagset.tags would be out-of-sync
with the hctx->tags. Maybe that should be made more explicit in the code.
BTW, do you have anything to say about this (modified slightly) comment:
/*
* if ret != 0, q->nr_requests would not be updated, yet the depth
* for some hctx sched tags may have changed - is that the right thing
* to do?
*/
Thanks,
John
Powered by blists - more mailing lists