[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180824171439.GA12587@u40b0340c692b58f6553c.ant.amazon.com>
Date: Fri, 24 Aug 2018 10:14:39 -0700
From: Eduardo Valentin <eduval@...zon.com>
To: Jens Axboe <axboe@...nel.dk>
CC: "jianchao.wang" <jianchao.w.wang@...cle.com>,
<linux-block@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
Anchal Agarwal <anchalag@...zon.com>,
"Frank van der Linden" <fllinden@...zon.com>
Subject: Re: [PATCH] blk-wbt: get back the missed wakeup from __wbt_done
Adding the Frank and Anchal, as this part of the thread is not copying them.
Just a minor comment at the bottom.
On Fri, Aug 24, 2018 at 08:58:09AM -0600, Jens Axboe wrote:
> On 8/24/18 8:40 AM, Jens Axboe wrote:
> > On 8/23/18 8:06 PM, jianchao.wang wrote:
> >> Hi Jens
> >>
> >> On 08/23/2018 11:42 PM, Jens Axboe wrote:
> >>>> -
> >>>> - __set_current_state(TASK_RUNNING);
> >>>> - remove_wait_queue(&rqw->wait, &wait);
> >>>> + wbt_init_wait(&wait, &data);
> >>>> + prepare_to_wait_exclusive(&rqw->wait, &wait,
> >>>> + TASK_UNINTERRUPTIBLE);
> >>>> + if (lock) {
> >>>> + spin_unlock_irq(lock);
> >>>> + io_schedule();
> >>>> + spin_lock_irq(lock);
> >>>> + } else
> >>>> + io_schedule();
> >>> Aren't we still missing a get-token attempt after adding to the
> >>> waitqueue? For the case where someone frees the token after your initial
> >>> check, but before you add yourself to the waitqueue.
> >>
> >> I used to think about this.
> >> However, there is a very tricky scenario here:
> >> We will try get the wbt budget in wbt_wake_function.
> >> After add a task into the wait queue, wbt_wake_function has been able to
> >> be invoked for this task. If we get the wbt budget after prepare_to_wait_exclusive,
> >> we may get wbt budget twice.
> >
> > Ah yes good point. But without it, you've got another race that will
> > potentially put you to sleep forever.
> >
> > How about something like the below? That should take care of both
> > situations. Totally untested.
>
> Slightly better/cleaner one below. Still totally untested.
>
>
> diff --git a/block/blk-wbt.c b/block/blk-wbt.c
> index 84507d3e9a98..bc13544943ff 100644
> --- a/block/blk-wbt.c
> +++ b/block/blk-wbt.c
> @@ -123,16 +123,11 @@ static void rwb_wake_all(struct rq_wb *rwb)
> }
> }
>
> -static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
> +static void wbt_rqw_done(struct rq_wb *rwb, struct rq_wait *rqw,
> + enum wbt_flags wb_acct)
> {
> - struct rq_wb *rwb = RQWB(rqos);
> - struct rq_wait *rqw;
> int inflight, limit;
>
> - if (!(wb_acct & WBT_TRACKED))
> - return;
> -
> - rqw = get_rq_wait(rwb, wb_acct);
> inflight = atomic_dec_return(&rqw->inflight);
>
> /*
> @@ -166,8 +161,21 @@ static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
> int diff = limit - inflight;
>
> if (!inflight || diff >= rwb->wb_background / 2)
> - wake_up(&rqw->wait);
> + wake_up_all(&rqw->wait);
> }
> +
> +}
> +
> +static void __wbt_done(struct rq_qos *rqos, enum wbt_flags wb_acct)
> +{
> + struct rq_wb *rwb = RQWB(rqos);
> + struct rq_wait *rqw;
> +
> + if (!(wb_acct & WBT_TRACKED))
> + return;
> +
> + rqw = get_rq_wait(rwb, wb_acct);
> + wbt_rqw_done(rwb, rqw, wb_acct);
> }
>
> /*
> @@ -481,6 +489,32 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
> return limit;
> }
>
> +struct wbt_wait_data {
> + struct task_struct *curr;
> + struct rq_wb *rwb;
> + struct rq_wait *rqw;
> + unsigned long rw;
> + bool got_token;
> +};
> +
> +static int wbt_wake_function(wait_queue_entry_t *curr, unsigned int mode,
> + int wake_flags, void *key)
> +{
> + struct wbt_wait_data *data = curr->private;
> +
> + /*
> + * If we fail to get a budget, return -1 to interrupt the wake up
> + * loop in __wake_up_common.
> + */
> + if (!rq_wait_inc_below(data->rqw, get_limit(data->rwb, data->rw)))
> + return -1;
> +
> + data->got_token = true;
> + wake_up_process(data->curr);
> + list_del_init(&curr->entry);
> + return 1;
> +}
> +
> /*
> * Block if we will exceed our limit, or if we are currently waiting for
> * the timer to kick off queuing again.
> @@ -491,31 +525,44 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct,
> __acquires(lock)
> {
> struct rq_wait *rqw = get_rq_wait(rwb, wb_acct);
> - DECLARE_WAITQUEUE(wait, current);
> + struct wbt_wait_data data = {
> + .curr = current,
> + .rwb = rwb,
> + .rqw = rqw,
> + .rw = rw,
> + };
> + struct wait_queue_entry wait = {
> + .func = wbt_wake_function,
> + .private = &data,
> + .entry = LIST_HEAD_INIT(wait.entry),
> + };
> bool has_sleeper;
>
> has_sleeper = wq_has_sleeper(&rqw->wait);
> if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
> return;
>
> - add_wait_queue_exclusive(&rqw->wait, &wait);
> - do {
> - set_current_state(TASK_UNINTERRUPTIBLE);
> + prepare_to_wait_exclusive(&rqw->wait, &wait, TASK_UNINTERRUPTIBLE);
>
> - if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw)))
> - break;
> + if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) {
> + finish_wait(&rqw->wait, &wait);
>
> - if (lock) {
> - spin_unlock_irq(lock);
> - io_schedule();
> - spin_lock_irq(lock);
> - } else
> - io_schedule();
> - has_sleeper = false;
> - } while (1);
> -
> - __set_current_state(TASK_RUNNING);
> - remove_wait_queue(&rqw->wait, &wait);
> + /*
> + * We raced with wbt_wake_function() getting a token, which
> + * means we now have two. Put ours and wake anyone else
> + * potentially waiting for one.
> + */
> + if (data.got_token)
> + wbt_rqw_done(rwb, rqw, wb_acct);
> + return;
> + }
> +
> + if (lock) {
> + spin_unlock_irq(lock);
> + io_schedule();
> + spin_lock_irq(lock);
> + } else
> + io_schedule();
Nitpick but, shouldn't this look like:
+ if (lock) {
+ spin_unlock_irq(lock);
+ io_schedule();
+ spin_lock_irq(lock);
+ } else {
+ io_schedule();
+ }
And another random though, it would be good to have some sort of tracing of this.
> }
>
> static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
>
> --
> Jens Axboe
>
>
--
All the best,
Eduardo Valentin
Powered by blists - more mailing lists