lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtCnEdPpZSu3=t4tTEm-nWEwkyTwQuTLngcgABhcMGWvZA@mail.gmail.com>
Date:   Fri, 2 Jun 2023 15:45:18 +0200
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     mingo@...nel.org, linux-kernel@...r.kernel.org,
        juri.lelli@...hat.com, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, corbet@....net, qyousef@...alina.io,
        chris.hyser@...cle.com, patrick.bellasi@...bug.net, pjt@...gle.com,
        pavel@....cz, qperret@...gle.com, tim.c.chen@...ux.intel.com,
        joshdon@...gle.com, timj@....org, kprateek.nayak@....com,
        yu.c.chen@...el.com, youssefesmat@...omium.org,
        joel@...lfernandes.org, efault@....de, tglx@...utronix.de
Subject: Re: [PATCH 11/15] sched/eevdf: Better handle mixed slice length

On Wed, 31 May 2023 at 14:47, Peter Zijlstra <peterz@...radead.org> wrote:
>
> In the case where (due to latency-nice) there are different request
> sizes in the tree, the smaller requests tend to be dominated by the
> larger. Also note how the EEVDF lag limits are based on r_max.
>
> Therefore; add a heuristic that for the mixed request size case, moves
> smaller requests to placement strategy #2 which ensures they're
> immidiately eligible and and due to their smaller (virtual) deadline
> will cause preemption.
>
> NOTE: this relies on update_entity_lag() to impose lag limits above
> a single slice.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
>  kernel/sched/fair.c     |   30 ++++++++++++++++++++++++++++++
>  kernel/sched/features.h |    1 +
>  kernel/sched/sched.h    |    1 +
>  3 files changed, 32 insertions(+)
>
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -642,6 +642,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq,
>         s64 key = entity_key(cfs_rq, se);
>
>         cfs_rq->avg_vruntime += key * weight;
> +       cfs_rq->avg_slice += se->slice * weight;
>         cfs_rq->avg_load += weight;
>  }
>
> @@ -652,6 +653,7 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq,
>         s64 key = entity_key(cfs_rq, se);
>
>         cfs_rq->avg_vruntime -= key * weight;
> +       cfs_rq->avg_slice -= se->slice * weight;
>         cfs_rq->avg_load -= weight;
>  }
>
> @@ -4908,6 +4910,21 @@ static inline void update_misfit_status(
>
>  #endif /* CONFIG_SMP */
>
> +static inline bool
> +entity_has_slept(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> +{
> +       u64 now;
> +
> +       if (!(flags & ENQUEUE_WAKEUP))
> +               return false;
> +
> +       if (flags & ENQUEUE_MIGRATED)
> +               return true;
> +
> +       now = rq_clock_task(rq_of(cfs_rq));
> +       return (s64)(se->exec_start - now) >= se->slice;
> +}
> +
>  static void
>  place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>  {
> @@ -4930,6 +4947,19 @@ place_entity(struct cfs_rq *cfs_rq, stru
>                 lag = se->vlag;
>
>                 /*
> +                * For latency sensitive tasks; those that have a shorter than
> +                * average slice and do not fully consume the slice, transition
> +                * to EEVDF placement strategy #2.
> +                */
> +               if (sched_feat(PLACE_FUDGE) &&
> +                   (cfs_rq->avg_slice > se->slice * cfs_rq->avg_load) &&
> +                   entity_has_slept(cfs_rq, se, flags)) {
> +                       lag += vslice;
> +                       if (lag > 0)
> +                               lag = 0;

This PLACE_FUDGE looks quite not a good heuristic because it breaks
the better fair sharing of cpu bandwidth that EEVDF is supposed to
bring. Furthermore, it breaks the isolation between cpu bandwidth and
latency because playing with latency_nice will impact your cpu
bandwidth

> +               }
> +
> +               /*
>                  * If we want to place a task and preserve lag, we have to
>                  * consider the effect of the new entity on the weighted
>                  * average and compensate for this, otherwise lag can quickly
> --- a/kernel/sched/features.h
> +++ b/kernel/sched/features.h
> @@ -5,6 +5,7 @@
>   * sleep+wake cycles. EEVDF placement strategy #1, #2 if disabled.
>   */
>  SCHED_FEAT(PLACE_LAG, true)
> +SCHED_FEAT(PLACE_FUDGE, true)
>  SCHED_FEAT(PLACE_DEADLINE_INITIAL, true)
>
>  /*
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -555,6 +555,7 @@ struct cfs_rq {
>         unsigned int            idle_h_nr_running; /* SCHED_IDLE */
>
>         s64                     avg_vruntime;
> +       u64                     avg_slice;
>         u64                     avg_load;
>
>         u64                     exec_clock;
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ