linux-kernel - Re: [PATCH v2 08/13] blk-mq: simplify flush check in blk_mq_dispatch_rq

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <540fdd7d-e13b-fffb-20e2-95b59bac0009@huaweicloud.com>
Date:   Tue, 10 Jan 2023 20:35:02 +0800
From:   Kemeng Shi <shikemeng@...weicloud.com>
To:     Christoph Hellwig <hch@....de>
Cc:     axboe@...nel.dk, dwagner@...e.de, hare@...e.de,
        ming.lei@...hat.com, linux-block@...r.kernel.org,
        linux-kernel@...r.kernel.org, john.garry@...wei.com, jack@...e.cz
Subject: Re: [PATCH v2 08/13] blk-mq: simplify flush check in
 blk_mq_dispatch_rq_list



on 1/10/2023 4:09 PM, Christoph Hellwig wrote:
> On Mon, Jan 09, 2023 at 10:27:33AM +0800, Kemeng Shi wrote:
>> After we queue request[s] to one driver queue, we need to notify driver
>> that there are no more request to the queue or driver will keep waiting
>> for the last request to be queued and IO hung could happen.
> 
> Yes.
> 
>> Normaly, we will notify this by setting .last in struct blk_mq_queue_data
>> along with the normal last request .rq in struct blk_mq_queue_data. The
>> extra commit is only needed if normal last information in .last is lost.
>> (See comment in struct blk_mq_ops for commit_rqs).
>>
>> The lost could occur if error happens for sending last request with .last
>> set or error happen in middle of list and we even do not send the request
>> with .last set.
> 
> Yes. So the rule is:
> 
>  1) did not queue everything initially scheduled to queue
> 
> OR
> 
>  2) the last attempt to queue a request failed
> 
> I think we need to find a way to clearly document that and that
> make all callers match it.
> For most this becomes a
> 
> 	if (ret || !list_empty(list))
> 
> or even just
> 
> 	if (ret)
> 
> as an error is often the only way to break out of the submission
> loop.
> 
> I wonder if we need to split the queued clearing from blk_mq_commit_rqs
> and just clear it in the existing callers, so that we can use that
> helpers for all commits, nicely hiding the ->commit_rqs presence
> check, and then move that call to where it is needed directly.  Something
> like this untested patch (which needs to be split up), which also
> makes sure we trace these calls consistently:
Yes, using helper also makes queued check consistently. Currently, most code
only calls commit_rqs if any request is queued, one exception is that
blk_mq_plug_issue_direct calls commit_rqs without queued check.
Besides, we can document the the rule before blk_mq_commit_rqs. Any caller
in future can notice the rule and match it.
I will send next version based on suggested helper.
Thanks.
> ---
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index c5cf0dbca1db8d..436ca56a0b7172 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2001,6 +2001,15 @@ static void blk_mq_release_budgets(struct request_queue *q,
>  	}
>  }
>  
> +static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int queued,
> +			      bool from_schedule)
> +{
> +	if (queued && hctx->queue->mq_ops->commit_rqs) {
> +		trace_block_unplug(hctx->queue, queued, !from_schedule);
> +		hctx->queue->mq_ops->commit_rqs(hctx);
> +	}
> +}
> +
>  /*
>   * Returns true if we did some work AND can potentially do more.
>   */
> @@ -2082,12 +2091,9 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list,
>  	if (!list_empty(&zone_list))
>  		list_splice_tail_init(&zone_list, list);
>  
> -	/* If we didn't flush the entire list, we could have told the driver
> -	 * there was more coming, but that turned out to be a lie.
> -	 */
> -	if ((!list_empty(list) || errors || needs_resource ||
> -	     ret == BLK_STS_DEV_RESOURCE) && q->mq_ops->commit_rqs && queued)
> -		q->mq_ops->commit_rqs(hctx);
> +	if (!list_empty(list) || ret)
> +		blk_mq_commit_rqs(hctx, queued, false);
> +
>  	/*
>  	 * Any items that need requeuing? Stuff them into hctx->dispatch,
>  	 * that is where we will continue on next queue run.
> @@ -2548,16 +2554,6 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
>  	spin_unlock(&ctx->lock);
>  }
>  
> -static void blk_mq_commit_rqs(struct blk_mq_hw_ctx *hctx, int *queued,
> -			      bool from_schedule)
> -{
> -	if (hctx->queue->mq_ops->commit_rqs) {
> -		trace_block_unplug(hctx->queue, *queued, !from_schedule);
> -		hctx->queue->mq_ops->commit_rqs(hctx);
> -	}
> -	*queued = 0;
> -}
> -
>  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
>  		unsigned int nr_segs)
>  {
> @@ -2684,17 +2680,17 @@ static blk_status_t blk_mq_request_issue_directly(struct request *rq, bool last)
>  static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>  {
>  	struct blk_mq_hw_ctx *hctx = NULL;
> +	blk_status_t ret = BLK_STS_OK;
>  	struct request *rq;
>  	int queued = 0;
> -	int errors = 0;
>  
>  	while ((rq = rq_list_pop(&plug->mq_list))) {
>  		bool last = rq_list_empty(plug->mq_list);
> -		blk_status_t ret;
>  
>  		if (hctx != rq->mq_hctx) {
>  			if (hctx)
> -				blk_mq_commit_rqs(hctx, &queued, from_schedule);
> +				blk_mq_commit_rqs(hctx, queued, from_schedule);
> +			queued = 0;
>  			hctx = rq->mq_hctx;
>  		}
>  
> @@ -2706,21 +2702,15 @@ static void blk_mq_plug_issue_direct(struct blk_plug *plug, bool from_schedule)
>  		case BLK_STS_RESOURCE:
>  		case BLK_STS_DEV_RESOURCE:
>  			blk_mq_request_bypass_insert(rq, false, true);
> -			blk_mq_commit_rqs(hctx, &queued, from_schedule);
> -			return;
> +			goto out;
>  		default:
>  			blk_mq_end_request(rq, ret);
> -			errors++;
>  			break;
>  		}
>  	}
> -
> -	/*
> -	 * If we didn't flush the entire list, we could have told the driver
> -	 * there was more coming, but that turned out to be a lie.
> -	 */
> -	if (errors)
> -		blk_mq_commit_rqs(hctx, &queued, from_schedule);
> +out:
> +	if (ret)
> +		blk_mq_commit_rqs(hctx, queued, from_schedule);
>  }
>  
>  static void __blk_mq_flush_plug_list(struct request_queue *q,
> @@ -2804,37 +2794,33 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
>  void blk_mq_try_issue_list_directly(struct blk_mq_hw_ctx *hctx,
>  		struct list_head *list)
>  {
> +	blk_status_t ret = BLK_STS_OK;
> +	struct request *rq;
>  	int queued = 0;
> -	int errors = 0;
> -
> -	while (!list_empty(list)) {
> -		blk_status_t ret;
> -		struct request *rq = list_first_entry(list, struct request,
> -				queuelist);
> +	bool last;
>  
> +	while ((rq = list_first_entry_or_null(list, struct request,
> +			queuelist))) {
>  		list_del_init(&rq->queuelist);
> -		ret = blk_mq_request_issue_directly(rq, list_empty(list));
> -		if (ret != BLK_STS_OK) {
> -			errors++;
> -			if (ret == BLK_STS_RESOURCE ||
> -					ret == BLK_STS_DEV_RESOURCE) {
> -				blk_mq_request_bypass_insert(rq, false,
> -							list_empty(list));
> -				break;
> -			}
> -			blk_mq_end_request(rq, ret);
> -		} else
> +		last = list_empty(list);
> +
> +		ret = blk_mq_request_issue_directly(rq, last);
> +		switch (ret) {
> +		case BLK_STS_OK:
>  			queued++;
> +			break;
> +		case BLK_STS_RESOURCE:
> +		case BLK_STS_DEV_RESOURCE:
> +			blk_mq_request_bypass_insert(rq, false, last);
> +			goto out;
> +		default:
> +			blk_mq_end_request(rq, ret);
> +			break;
> +		}
>  	}
> -
> -	/*
> -	 * If we didn't flush the entire list, we could have told
> -	 * the driver there was more coming, but that turned out to
> -	 * be a lie.
> -	 */
> -	if ((!list_empty(list) || errors) &&
> -	     hctx->queue->mq_ops->commit_rqs && queued)
> -		hctx->queue->mq_ops->commit_rqs(hctx);
> +out:
> +	if (ret)
> +		blk_mq_commit_rqs(hctx, queued, false);
>  }
>  
>  static bool blk_mq_attempt_bio_merge(struct request_queue *q,
> 

-- 
Best wishes
Kemeng Shi