linux-kernel - Re: [PATCH] blk-wbt: get back the missed wakeup from __wbt

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180824171439.GA12587@u40b0340c692b58f6553c.ant.amazon.com>
Date:   Fri, 24 Aug 2018 10:14:39 -0700
From:   Eduardo Valentin <eduval@...zon.com>
To:     Jens Axboe <axboe@...nel.dk>
CC:     "jianchao.wang" <jianchao.w.wang@...cle.com>,
        <linux-block@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        Anchal Agarwal <anchalag@...zon.com>,
        "Frank van der Linden" <fllinden@...zon.com>
Subject: Re: [PATCH] blk-wbt: get back the missed wakeup from __wbt_done

Adding the Frank and Anchal, as this part of the thread is not copying them.

Just a minor comment at the bottom.

On Fri, Aug 24, 2018 at 08:58:09AM -0600, Jens Axboe wrote:
> On 8/24/18 8:40 AM, Jens Axboe wrote:
> > On 8/23/18 8:06 PM, jianchao.wang wrote:
> >> Hi Jens
> >>
> >> On 08/23/2018 11:42 PM, Jens Axboe wrote:
> >>>> -
> >>>> -	__set_current_state(TASK_RUNNING);
> >>>> -	remove_wait_queue(&rqw->wait, &wait);
> >>>> +	wbt_init_wait(&wait, &data);
> >>>> +	prepare_to_wait_exclusive(&rqw->wait, &wait,
> >>>> +			TASK_UNINTERRUPTIBLE);
> >>>> +	if (lock) {
> >>>> +		spin_unlock_irq(lock);
> >>>> +		io_schedule();
> >>>> +		spin_lock_irq(lock);
> >>>> +	} else
> >>>> +		io_schedule();
> >>> Aren't we still missing a get-token attempt after adding to the
> >>> waitqueue? For the case where someone frees the token after your initial
> >>> check, but before you add yourself to the waitqueue.
> >>
> >> I used to think about this.
> >> However, there is a very tricky scenario here:
> >> We will try get the wbt budget in wbt_wake_function.
> >> After add a task into the wait queue, wbt_wake_function has been able to
> >> be invoked for this task. If we get the wbt budget after prepare_to_wait_exclusive,
> >> we may get wbt budget twice.
> > 
> > Ah yes good point. But without it, you've got another race that will
> > potentially put you to sleep forever.
> > 
> > How about something like the below? That should take care of both
> > situations. Totally untested.
> 
> Slightly better/cleaner one below. Still totally untested.
> 
> 
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index 84507d3e9a98..bc13544943ff 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb)
>  	}
>  }
>  
> -static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
> +static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
> +			 enum wbt_flags wb_acct)
>  {
> -	struct rq_wb *rwb = RQWB(rqos);
> -	struct rq_wait *rqw;
>  	int inflight, limit;
>  
> -	if (!(wb_acct & WBT_TRACKED))
> -		return;
> -
> -	rqw = get_rq_wait(rwb, wb_acct);
>  	inflight = atomic_dec_return(&rqw->inflight);
>  
>  	/*
> @@ -166,8 +161,21 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
>  		int diff = limit - inflight;
>  
>  		if (!inflight || diff >= rwb->wb_background / 2)
> -			wake_up(&rqw->wait);
> +			wake_up_all(&rqw->wait);
>  	}
> +
> +}
> +
> +static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
> +{
> +	struct rq_wb *rwb = RQWB(rqos);
> +	struct rq_wait *rqw;
> +
> +	if (!(wb_acct & WBT_TRACKED))
> +		return;
> +
> +	rqw = get_rq_wait(rwb, wb_acct);
> +	wbt_rqw_done(rwb, rqw, wb_acct);
>  }
>  
>  /*
> @@ -481,6 +489,32 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
>  	return limit;
>  }
>  
> +struct wbt_wait_data {
> +	struct task_struct *curr;
> +	struct rq_wb *rwb;
> +	struct rq_wait *rqw;
> +	unsigned long rw;
> +	bool got_token;
> +};
> +
> +static int wbt_wake_function(wait_queue_entry_t *curr, unsigned int mode,
> +			     int wake_flags, void *key)
> +{
> +	struct wbt_wait_data *data = curr->private;
> +
> +	/*
> +	 * If we fail to get a budget, return -1 to interrupt the wake up
> +	 * loop in __wake_up_common.
> +	 */
> +	if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
> +		return -1;
> +
> +	data->got_token = true;
> +	wake_up_process(data->curr);
> +	list_del_init(&curr->entry);
> +	return 1;
> +}
> +
>  /*
>   * Block if we will exceed our limit, or if we are currently waiting for
>   * the timer to kick off queuing again.
> @@ -491,31 +525,44 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
>  	__acquires(lock)
>  {
>  	struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
> -	DECLARE_WAITQUEUE(wait, current);
> +	struct wbt_wait_data data = {
> +		.curr = current,
> +		.rwb = rwb,
> +		.rqw = rqw,
> +		.rw = rw,
> +	};
> +	struct wait_queue_entry wait = {
> +		.func		= wbt_wake_function,
> +		.private	= &data,
> +		.entry		= LIST_HEAD_INIT(wait.entry),
> +	};
>  	bool has_sleeper;
>  
>  	has_sleeper = wq_has_sleeper(&rqw->wait);
>  	if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
>  		return;
>  
> -	add_wait_queue_exclusive(&rqw->wait, &wait);
> -	do {
> -		set_current_state(TASK_UNINTERRUPTIBLE);
> +	prepare_to_wait_exclusive(&rqw->wait, &wait, TASK_UNINTERRUPTIBLE);
>  
> -		if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
> -			break;
> +	if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) {
> +		finish_wait(&rqw->wait, &wait);
>  
> -		if (lock) {
> -			spin_unlock_irq(lock);
> -			io_schedule();
> -			spin_lock_irq(lock);
> -		} else
> -			io_schedule();
> -		has_sleeper = false;
> -	} while (1);
> -
> -	__set_current_state(TASK_RUNNING);
> -	remove_wait_queue(&rqw->wait, &wait);
> +		/*
> +		 * We raced with wbt_wake_function() getting a token, which
> +		 * means we now have two. Put ours and wake anyone else
> +		 * potentially waiting for one.
> +		 */
> +		if (data.got_token)
> +			wbt_rqw_done(rwb, rqw, wb_acct);
> +		return;
> +	}
> +
> +	if (lock) {
> +		spin_unlock_irq(lock);
> +		io_schedule();
> +		spin_lock_irq(lock);
> +	} else
> +		io_schedule();

Nitpick but, shouldn't this look like:

+	if (lock) {
+		spin_unlock_irq(lock);
+		io_schedule();
+		spin_lock_irq(lock);
+	} else {
+		io_schedule();
+	}


And another random though, it would be good to have some sort of tracing of this.

>  }
>  
>  static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
> 
> -- 
> Jens Axboe
> 
> 

-- 
All the best,
Eduardo Valentin