linux-kernel - Re: [PATCH BUGFIX] block, bfq: postpone rq preparation to insert or merge

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8f68cb3cfa16a239a3895687cf329bb3@natalenko.name>
Date:   Sun, 06 May 2018 09:33:41 +0200
From:   Oleksandr Natalenko <oleksandr@...alenko.name>
To:     Paolo Valente <paolo.valente@...aro.org>
Cc:     Jens Axboe <axboe@...nel.dk>, linux-block@...r.kernel.org,
        linux-kernel@...r.kernel.org, ulf.hansson@...aro.org,
        broonie@...nel.org, linus.walleij@...aro.org,
        bfq-iosched@...glegroups.com
Subject: Re: [PATCH BUGFIX] block, bfq: postpone rq preparation to insert or
 merge

Hi.

On 04.05.2018 19:17, Paolo Valente wrote:
> When invoked for an I/O request rq, the prepare_request hook of bfq
> increments reference counters in the destination bfq_queue for rq. In
> this respect, after this hook has been invoked, rq may still be
> transformed into a request with no icq attached, i.e., for bfq, a
> request not associated with any bfq_queue. No further hook is invoked
> to signal this tranformation to bfq (in general, to the destination
> elevator for rq). This leads bfq into an inconsistent state, because
> bfq has no chance to correctly lower these counters back. This
> inconsistency may in its turn cause incorrect scheduling and hangs. It
> certainly causes memory leaks, by making it impossible for bfq to free
> the involved bfq_queue.
> 
> On the bright side, no transformation can still happen for rq after rq
> has been inserted into bfq, or merged with another, already inserted,
> request. Exploiting this fact, this commit addresses the above issue
> by delaying the preparation of an I/O request to when the request is
> inserted or merged.
> 
> This change also gives a performance bonus: a lock-contention point
> gets removed. To prepare a request, bfq needs to hold its scheduler
> lock. After postponing request preparation to insertion or merging, no
> lock needs to be grabbed any longer in the prepare_request hook, while
> the lock already taken to perform insertion or merging is used to
> preparare the request as well.
> 
> Signed-off-by: Paolo Valente <paolo.valente@...aro.org>
> ---
>  block/bfq-iosched.c | 86 
> +++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 57 insertions(+), 29 deletions(-)
> 
> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
> index 771ae9730ac6..ea02162df6c7 100644
> --- a/block/bfq-iosched.c
> +++ b/block/bfq-iosched.c
> @@ -1858,6 +1858,8 @@ static int bfq_request_merge(struct
> request_queue *q, struct request **req,
>  	return ELEVATOR_NO_MERGE;
>  }
> 
> +static struct bfq_queue *bfq_init_rq(struct request *rq);
> +
>  static void bfq_request_merged(struct request_queue *q, struct request 
> *req,
>  			       enum elv_merge type)
>  {
> @@ -1866,7 +1868,7 @@ static void bfq_request_merged(struct
> request_queue *q, struct request *req,
>  	    blk_rq_pos(req) <
>  	    blk_rq_pos(container_of(rb_prev(&req->rb_node),
>  				    struct request, rb_node))) {
> -		struct bfq_queue *bfqq = RQ_BFQQ(req);
> +		struct bfq_queue *bfqq = bfq_init_rq(req);
>  		struct bfq_data *bfqd = bfqq->bfqd;
>  		struct request *prev, *next_rq;
> 
> @@ -1894,7 +1896,8 @@ static void bfq_request_merged(struct
> request_queue *q, struct request *req,
>  static void bfq_requests_merged(struct request_queue *q, struct 
> request *rq,
>  				struct request *next)
>  {
> -	struct bfq_queue *bfqq = RQ_BFQQ(rq), *next_bfqq = RQ_BFQQ(next);
> +	struct bfq_queue *bfqq = bfq_init_rq(rq),
> +		*next_bfqq = bfq_init_rq(next);
> 
>  	if (!RB_EMPTY_NODE(&rq->rb_node))
>  		goto end;
> @@ -4540,14 +4543,12 @@ static inline void
> bfq_update_insert_stats(struct request_queue *q,
>  					   unsigned int cmd_flags) {}
>  #endif
> 
> -static void bfq_prepare_request(struct request *rq, struct bio *bio);
> -
>  static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct 
> request *rq,
>  			       bool at_head)
>  {
>  	struct request_queue *q = hctx->queue;
>  	struct bfq_data *bfqd = q->elevator->elevator_data;
> -	struct bfq_queue *bfqq = RQ_BFQQ(rq);
> +	struct bfq_queue *bfqq;
>  	bool idle_timer_disabled = false;
>  	unsigned int cmd_flags;
> 
> @@ -4562,24 +4563,13 @@ static void bfq_insert_request(struct
> blk_mq_hw_ctx *hctx, struct request *rq,
>  	blk_mq_sched_request_inserted(rq);
> 
>  	spin_lock_irq(&bfqd->lock);
> +	bfqq = bfq_init_rq(rq);
>  	if (at_head || blk_rq_is_passthrough(rq)) {
>  		if (at_head)
>  			list_add(&rq->queuelist, &bfqd->dispatch);
>  		else
>  			list_add_tail(&rq->queuelist, &bfqd->dispatch);
> -	} else {
> -		if (WARN_ON_ONCE(!bfqq)) {
> -			/*
> -			 * This should never happen. Most likely rq is
> -			 * a requeued regular request, being
> -			 * re-inserted without being first
> -			 * re-prepared. Do a prepare, to avoid
> -			 * failure.
> -			 */
> -			bfq_prepare_request(rq, rq->bio);
> -			bfqq = RQ_BFQQ(rq);
> -		}
> -
> +	} else { /* bfqq is assumed to be non null here */
>  		idle_timer_disabled = __bfq_insert_request(bfqd, rq);
>  		/*
>  		 * Update bfqq, because, if a queue merge has occurred
> @@ -4922,11 +4912,48 @@ static struct bfq_queue
> *bfq_get_bfqq_handle_split(struct bfq_data *bfqd,
>  }
> 
>  /*
> - * Allocate bfq data structures associated with this request.
> + * Only reset private fields. The actual request preparation will be
> + * performed by bfq_init_rq, when rq is either inserted or merged. See
> + * comments on bfq_init_rq for the reason behind this delayed
> + * preparation.
>   */
>  static void bfq_prepare_request(struct request *rq, struct bio *bio)
> +{
> +	/*
> +	 * Regardless of whether we have an icq attached, we have to
> +	 * clear the scheduler pointers, as they might point to
> +	 * previously allocated bic/bfqq structs.
> +	 */
> +	rq->elv.priv[0] = rq->elv.priv[1] = NULL;
> +}
> +
> +/*
> + * If needed, init rq, allocate bfq data structures associated with
> + * rq, and increment reference counters in the destination bfq_queue
> + * for rq. Return the destination bfq_queue for rq, or NULL is rq is
> + * not associated with any bfq_queue.
> + *
> + * This function is invoked by the functions that perform rq insertion
> + * or merging. One may have expected the above preparation operations
> + * to be performed in bfq_prepare_request, and not delayed to when rq
> + * is inserted or merged. The rationale behind this delayed
> + * preparation is that, after the prepare_request hook is invoked for
> + * rq, rq may still be transformed into a request with no icq, i.e., a
> + * request not associated with any queue. No bfq hook is invoked to
> + * signal this tranformation. As a consequence, should these
> + * preparation operations be performed when the prepare_request hook
> + * is invoked, and should rq be transformed one moment later, bfq
> + * would end up in an inconsistent state, because it would have
> + * incremented some queue counters for an rq destined to
> + * transformation, without any chance to correctly lower these
> + * counters back. In contrast, no transformation can still happen for
> + * rq after rq has been inserted or merged. So, it is safe to execute
> + * these preparation operations when rq is finally inserted or merged.
> + */
> +static struct bfq_queue *bfq_init_rq(struct request *rq)
>  {
>  	struct request_queue *q = rq->q;
> +	struct bio *bio = rq->bio;
>  	struct bfq_data *bfqd = q->elevator->elevator_data;
>  	struct bfq_io_cq *bic;
>  	const int is_sync = rq_is_sync(rq);
> @@ -4934,20 +4961,21 @@ static void bfq_prepare_request(struct request
> *rq, struct bio *bio)
>  	bool new_queue = false;
>  	bool bfqq_already_existing = false, split = false;
> 
> +	if (unlikely(!rq->elv.icq))
> +		return NULL;
> +
>  	/*
> -	 * Even if we don't have an icq attached, we should still clear
> -	 * the scheduler pointers, as they might point to previously
> -	 * allocated bic/bfqq structs.
> +	 * Assuming that elv.priv[1] is set only if everything is set
> +	 * for this rq. This holds true, because this function is
> +	 * invoked only for insertion or merging, and, after such
> +	 * events, a request cannot be manipulated any longer before
> +	 * being removed from bfq.
>  	 */
> -	if (!rq->elv.icq) {
> -		rq->elv.priv[0] = rq->elv.priv[1] = NULL;
> -		return;
> -	}
> +	if (rq->elv.priv[1])
> +		return rq->elv.priv[1];
> 
>  	bic = icq_to_bic(rq->elv.icq);
> 
> -	spin_lock_irq(&bfqd->lock);
> -
>  	bfq_check_ioprio_change(bic, bio);
> 
>  	bfq_bic_update_cgroup(bic, bio);
> @@ -5006,7 +5034,7 @@ static void bfq_prepare_request(struct request
> *rq, struct bio *bio)
>  	if (unlikely(bfq_bfqq_just_created(bfqq)))
>  		bfq_handle_burst(bfqd, bfqq);
> 
> -	spin_unlock_irq(&bfqd->lock);
> +	return bfqq;
>  }
> 
>  static void bfq_idle_slice_timer_body(struct bfq_queue *bfqq)

No harm is observed on both test VM with smartctl hammer and my laptop. 
So,

Tested-by: Oleksandr Natalenko <oleksandr@...alenko.name>

Thanks.

-- 
   Oleksandr Natalenko (post-factum)