linux-kernel - Re: [PATCH 2/2] sched/fair: Update blocked load from newly idle balance

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtDaLCnF_fGxcyBSf2UTK8pNDQg4mPdPKbrU9sW-2Z+8Aw@mail.gmail.com>
Date:   Mon, 20 Nov 2017 10:07:33 +0100
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     Brendan Jackman <brendan.jackman@....com>
Cc:     Dietmar Eggemann <dietmar.eggemann@....com>,
        Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...hat.com>,
        Morten Rasmussen <morten.rasmussen@....com>
Subject: Re: [PATCH 2/2] sched/fair: Update blocked load from newly idle balance

On 24 October 2017 at 14:25, Brendan Jackman <brendan.jackman@....com> wrote:
> We now have a NOHZ kick to avoid the load of idle CPUs becoming stale. This is
> good, but it brings about CPU wakeups, which have an energy cost. As an
> alternative to waking CPUs up to do decay blocked load, we can sometimes do it
> from newly idle balance. If the newly idle balance is on a domain that covers
> all the currently nohz-idle CPUs, we push the value of nohz.next_update into the
> future. That means that if such newly idle balances happen often enough, we
> never need wake up a CPU just to update load.
>
> Since we're doing this new update inside a for_each_domain, we need to do
> something to avoid doing multiple updates on the same CPU in the same
> idle_balance. A tick stamp is set on the rq in update_blocked_averages as a
> simple way to do this. Using a simple jiffies-based timestamp, as opposed to the
> last_update_time of the root cfs_rq's sched_avg, means we can do this without
> taking the rq lock.
>
> Cc: Dietmar Eggemann <dietmar.eggemann@....com>
> Cc: Vincent Guittot <vincent.guittot@...aro.org>
> Cc: Ingo Molnar <mingo@...hat.com>
> Cc: Morten Rasmussen <morten.rasmussen@....com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Signed-off-by: Brendan Jackman <brendan.jackman@....com>
> ---
>  kernel/sched/core.c  |  1 +
>  kernel/sched/fair.c  | 41 +++++++++++++++++++++++++++++++++++------
>  kernel/sched/sched.h |  1 +
>  3 files changed, 37 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d17c5da523a0..d8e71fd27806 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5923,6 +5923,7 @@ void __init sched_init(void)
>                 rq_attach_root(rq, &def_root_domain);
>  #ifdef CONFIG_NO_HZ_COMMON
>                 rq->last_load_update_tick = jiffies;
> +               rq->last_blocked_load_update_tick = jiffies;
>                 rq->nohz_flags = 0;
>  #endif
>  #ifdef CONFIG_NO_HZ_FULL
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 9085caf49c76..45e9c8056161 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7062,6 +7062,7 @@ static void update_blocked_averages(int cpu)
>                 if (cfs_rq_is_decayed(cfs_rq))
>                         list_del_leaf_cfs_rq(cfs_rq);
>         }
> +       rq->last_blocked_load_update_tick = jiffies;

last_blocked_load_update_tick is defined under CONFIG_NO_HZ_COMMON and
CONFIG_SMP
whereas update_blocked_averages() is not. This generates a compilation error

>         rq_unlock_irqrestore(rq, &rf);
>  }
>
> @@ -7121,6 +7122,7 @@ static inline void update_blocked_averages(int cpu)
>         rq_lock_irqsave(rq, &rf);
>         update_rq_clock(rq);
>         update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq);
> +       rq->last_blocked_load_update_tick = jiffies;
>         rq_unlock_irqrestore(rq, &rf);
>  }
>
> @@ -7615,6 +7617,15 @@ static inline enum fbq_type fbq_classify_rq(struct rq *rq)
>  }
>  #endif /* CONFIG_NUMA_BALANCING */
>
> +#ifdef CONFIG_NO_HZ_COMMON
> +static struct {
> +       cpumask_var_t idle_cpus_mask;
> +       atomic_t nr_cpus;
> +       unsigned long next_balance;     /* in jiffy units */
> +       unsigned long next_update;     /* in jiffy units */
> +} nohz ____cacheline_aligned;
> +#endif
> +
>  /**
>   * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
>   * @env: The load balancing environment.
> @@ -7633,6 +7644,30 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
>         if (child && child->flags & SD_PREFER_SIBLING)
>                 prefer_sibling = 1;
>
> +#ifdef CONFIG_NO_HZ_COMMON
> +       if (env->idle == CPU_NEWLY_IDLE) {
> +               int cpu;
> +
> +               /* Update the stats of NOHZ idle CPUs in the sd */
> +               for_each_cpu_and(cpu, sched_domain_span(env->sd),
> +                                nohz.idle_cpus_mask) {
> +                       struct rq *rq = cpu_rq(cpu);
> +
> +                       /* ... Unless we've already done since the last tick */
> +                       if (time_after(jiffies,
> +                                       rq->last_blocked_load_update_tick))
> +                               update_blocked_averages(cpu);
> +               }
> +       }
> +       /*
> +        * If we've just updated all of the NOHZ idle CPUs, then we can push
> +        * back the next nohz.next_update, which will prevent an unnecessary
> +        * wakeup for the nohz stats kick
> +        */
> +       if (cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd)))
> +               nohz.next_update = jiffies + LOAD_AVG_PERIOD;
> +#endif
> +
>         load_idx = get_sd_load_idx(env->sd, env->idle);
>
>         do {
> @@ -8657,12 +8692,6 @@ static inline int on_null_domain(struct rq *rq)
>   *   needed, they will kick the idle load balancer, which then does idle
>   *   load balancing for all the idle CPUs.
>   */
> -static struct {
> -       cpumask_var_t idle_cpus_mask;
> -       atomic_t nr_cpus;
> -       unsigned long next_balance;     /* in jiffy units */
> -       unsigned long next_update;     /* in jiffy units */
> -} nohz ____cacheline_aligned;
>
>  static inline int find_new_ilb(void)
>  {
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 6f95ef653f73..6be8938bb977 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -681,6 +681,7 @@ struct rq {
>  #ifdef CONFIG_NO_HZ_COMMON
>  #ifdef CONFIG_SMP
>         unsigned long last_load_update_tick;
> +       unsigned long last_blocked_load_update_tick;
>  #endif /* CONFIG_SMP */
>         unsigned long nohz_flags;
>  #endif /* CONFIG_NO_HZ_COMMON */
> --
> 2.14.1
>