linux-kernel - Re: [PATCH v4 5/5] sched/fair: Take into account runnable

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtB4g099_JHG3d0duoggL2fYPVH9b2mxixe88KPveD-RhA@mail.gmail.com>
Date:   Mon, 24 Feb 2020 09:32:03 +0100
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     Hillf Danton <hdanton@...a.com>
Cc:     Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        Phil Auld <pauld@...hat.com>, Parth Shah <parth@...ux.ibm.com>,
        Valentin Schneider <valentin.schneider@....com>
Subject: Re: [PATCH v4 5/5] sched/fair: Take into account runnable_avg to
 classify group

On Sat, 22 Feb 2020 at 06:55, Hillf Danton <hdanton@...a.com> wrote:
>
>
> On Fri, 21 Feb 2020 14:27:15 +0100 Vincent Guittot wrote:
> >
> > Take into account the new runnable_avg signal to classify a group and to
> > mitigate the volatility of util_avg in face of intensive migration or
> > new task with random utilization.
> >
> > Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
> > Reviewed-by: "Dietmar Eggemann <dietmar.eggemann@....com>"
> > ---
> >  kernel/sched/fair.c | 31 ++++++++++++++++++++++++++++++-
> >  1 file changed, 30 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 608c26d59c46..ef96049a02c3 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -5449,6 +5449,24 @@ static unsigned long cpu_runnable(struct rq *rq)
> >       return cfs_rq_runnable_avg(&rq->cfs);
> >  }
> >
> > +static unsigned long cpu_runnable_without(struct rq *rq, struct task_struct *p)
> > +{
> > +     struct cfs_rq *cfs_rq;
> > +     unsigned int runnable;
> > +
> > +     /* Task has no contribution or is new */
> > +     if (cpu_of(rq) != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
> > +             return cpu_runnable(rq);
> > +
> > +     cfs_rq = &rq->cfs;
> > +     runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
> > +
> > +     /* Discount task's runnable from CPU's runnable */
> > +     lsub_positive(&runnable, p->se.avg.runnable_avg);
> > +
> > +     return runnable;
> > +}
> > +
> >  static unsigned long capacity_of(int cpu)
> >  {
> >       return cpu_rq(cpu)->cpu_capacity;
> > @@ -7718,7 +7736,8 @@ struct sg_lb_stats {
> >       unsigned long avg_load; /*Avg load across the CPUs of the group */
> >       unsigned long group_load; /* Total load over the CPUs of the group */
> >       unsigned long group_capacity;
> > -     unsigned long group_util; /* Total utilization of the group */
> > +     unsigned long group_util; /* Total utilization over the CPUs of the group */
> > +     unsigned long group_runnable; /* Total runnable time over the CPUs of the group */
> >       unsigned int sum_nr_running; /* Nr of tasks running in the group */
> >       unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */
> >       unsigned int idle_cpus;
> > @@ -7939,6 +7958,10 @@ group_has_capacity(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
> >       if (sgs->sum_nr_running < sgs->group_weight)
> >               return true;
> >
> > +     if ((sgs->group_capacity * imbalance_pct) <
> > +                     (sgs->group_runnable * 100))
> > +             return false;
> > +
> >       if ((sgs->group_capacity * 100) >
> >                       (sgs->group_util * imbalance_pct))
> >               return true;
>
> Is it likely to compare capacity with runnable in the same way as
> with util e.g

We don't want to compare util and runnable in the same way because
util_avg is a lower bound of the utilization of the capacity so we
consider taht the group doesn't have spare capacity if util_avg is
close but below the capacity whereas runnable is the upper bound so it
must be higher than capacity before considering that there is no
capacity.

>
>         if ((sgs->group_capacity * 100) >
>             (max(sgs->group_util, sgs->group_runnable) * imbalance_pct))
>                 return true;
>
> > @@ -7964,6 +7987,10 @@ group_is_overloaded(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
> >                       (sgs->group_util * imbalance_pct))
> >               return true;
> >
> > +     if ((sgs->group_capacity * imbalance_pct) <
> > +                     (sgs->group_runnable * 100))
> > +             return true;
> > +
> >       return false;
> >  }
> >
> > @@ -8058,6 +8085,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
> >
> >               sgs->group_load += cpu_load(rq);
> >               sgs->group_util += cpu_util(i);
> > +             sgs->group_runnable += cpu_runnable(rq);
> >               sgs->sum_h_nr_running += rq->cfs.h_nr_running;
> >
> >               nr_running = rq->nr_running;
> > @@ -8333,6 +8361,7 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
> >
> >               sgs->group_load += cpu_load_without(rq, p);
> >               sgs->group_util += cpu_util_without(i, p);
> > +             sgs->group_runnable += cpu_runnable_without(rq, p);
> >               local = task_running_on_cpu(i, p);
> >               sgs->sum_h_nr_running += rq->cfs.h_nr_running - local;
> >
> > --
> > 2.17.1
> >
> >
>