lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtB_Bm=xYdudD6d0PLqm_kZ1bCQ3oQH2NMZ0ZGOqsJ8k9w@mail.gmail.com>
Date:   Mon, 23 May 2022 18:23:09 +0200
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     Chengming Zhou <zhouchengming@...edance.com>
Cc:     mingo@...hat.com, peterz@...radead.org, juri.lelli@...hat.com,
        dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
        mgorman@...e.de, bristot@...hat.com, linux-kernel@...r.kernel.org,
        duanxiongchun@...edance.com, songmuchun@...edance.com,
        zhengqi.arch@...edance.com
Subject: Re: [PATCH] sched/fair: optimize and simplify rq leaf_cfs_rq_list

On Wed, 27 Apr 2022 at 18:07, Chengming Zhou
<zhouchengming@...edance.com> wrote:
>
> We notice the rq leaf_cfs_rq_list has two problems when do bugfix
> backports and some test profiling.
>
> 1. cfs_rqs under throttled subtree could be added to the list, and
>    make their fully decayed ancestors on the list, even though not needed.
>
> 2. #1 also make the leaf_cfs_rq_list management complex and error prone,
>    this is the list of related bugfix so far:
>
>    commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
>    commit fe61468b2cbc ("sched/fair: Fix enqueue_task_fair warning")
>    commit b34cb07dde7c ("sched/fair: Fix enqueue_task_fair() warning some more")
>    commit 39f23ce07b93 ("sched/fair: Fix unthrottle_cfs_rq() for leaf_cfs_rq list")
>    commit 0258bdfaff5b ("sched/fair: Fix unfairness caused by missing load decay")
>    commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle")
>    commit fdaba61ef8a2 ("sched/fair: Ensure that the CFS parent is added after unthrottling")
>    commit 2630cde26711 ("sched/fair: Add ancestors of unthrottled undecayed cfs_rq")
>
> commit 31bc6aeaab1d ("sched/fair: Optimize update_blocked_averages()")
> delete every cfs_rq under throttled subtree from rq->leaf_cfs_rq_list,
> and delete the throttled_hierarchy() test in update_blocked_averages(),
> which optimized update_blocked_averages().
>
> But those later bugfix add cfs_rqs under throttled subtree back to
> rq->leaf_cfs_rq_list again, with their fully decayed ancestors, for
> the integrity of rq->leaf_cfs_rq_list.
>
> This patch takes another method, skip all cfs_rqs under throttled
> hierarchy when list_add_leaf_cfs_rq(), to completely make cfs_rqs
> under throttled subtree off the leaf_cfs_rq_list.
>
> So we don't need to consider throttled related things in
> enqueue_entity(), unthrottle_cfs_rq() and enqueue_task_fair(),
> which simplify the code a lot. Also optimize update_blocked_averages()
> since cfs_rqs under throttled hierarchy and their ancestors
> won't be on the leaf_cfs_rq_list.
>
> Signed-off-by: Chengming Zhou <zhouchengming@...edance.com>
> ---
>  kernel/sched/fair.c | 72 ++++++++++-----------------------------------
>  1 file changed, 16 insertions(+), 56 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 1ad18b5cc1b8..083c3d32c899 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -309,6 +309,8 @@ const struct sched_class fair_sched_class;
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>
> +static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> +
>  /* Walk up scheduling entities hierarchy */
>  #define for_each_sched_entity(se) \
>                 for (; se; se = se->parent)
> @@ -331,7 +333,7 @@ static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>         struct rq *rq = rq_of(cfs_rq);
>         int cpu = cpu_of(rq);
>
> -       if (cfs_rq->on_list)
> +       if (cfs_rq->on_list || throttled_hierarchy(cfs_rq))

Please move throttled_hierarchy() outside list_add_leaf_cfs_rq()
because the task will not be added in this case which is quite
misleading

I will continue to check the various corner cases but I haven't seen
problem so far with your method

>                 return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
>
>         cfs_rq->on_list = 1;
> @@ -3242,8 +3244,6 @@ static long calc_group_shares(struct cfs_rq *cfs_rq)
>  }
>  #endif /* CONFIG_SMP */
>
> -static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
> -
>  /*
>   * Recomputes the group entity based on the current state of its group
>   * runqueue.
> @@ -4356,16 +4356,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>                 __enqueue_entity(cfs_rq, se);
>         se->on_rq = 1;
>
> -       /*
> -        * When bandwidth control is enabled, cfs might have been removed
> -        * because of a parent been throttled but cfs->nr_running > 1. Try to
> -        * add it unconditionally.
> -        */
> -       if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
> +       if (cfs_rq->nr_running == 1) {
>                 list_add_leaf_cfs_rq(cfs_rq);
> -
> -       if (cfs_rq->nr_running == 1)
>                 check_enqueue_throttle(cfs_rq);
> +       }
>  }
>
>  static void __clear_buddies_last(struct sched_entity *se)
> @@ -4980,11 +4974,18 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>         /* update hierarchical throttle state */
>         walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
>
> -       /* Nothing to run but something to decay (on_list)? Complete the branch */
>         if (!cfs_rq->load.weight) {
> -               if (cfs_rq->on_list)
> -                       goto unthrottle_throttle;
> -               return;
> +               if (!cfs_rq->on_list)
> +                       return;
> +               /*
> +                * Nothing to run but something to decay (on_list)?
> +                * Complete the branch.
> +                */
> +               for_each_sched_entity(se) {
> +                       if (list_add_leaf_cfs_rq(cfs_rq_of(se)))
> +                               break;
> +               }
> +               goto unthrottle_throttle;
>         }
>
>         task_delta = cfs_rq->h_nr_running;
> @@ -5022,31 +5023,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
>                 /* end evaluation on encountering a throttled cfs_rq */
>                 if (cfs_rq_throttled(qcfs_rq))
>                         goto unthrottle_throttle;
> -
> -               /*
> -                * One parent has been throttled and cfs_rq removed from the
> -                * list. Add it back to not break the leaf list.
> -                */
> -               if (throttled_hierarchy(qcfs_rq))
> -                       list_add_leaf_cfs_rq(qcfs_rq);
>         }
>
>         /* At this point se is NULL and we are at root level*/
>         add_nr_running(rq, task_delta);
>
>  unthrottle_throttle:
> -       /*
> -        * The cfs_rq_throttled() breaks in the above iteration can result in
> -        * incomplete leaf list maintenance, resulting in triggering the
> -        * assertion below.
> -        */
> -       for_each_sched_entity(se) {
> -               struct cfs_rq *qcfs_rq = cfs_rq_of(se);
> -
> -               if (list_add_leaf_cfs_rq(qcfs_rq))
> -                       break;
> -       }
> -
>         assert_list_leaf_cfs_rq(rq);
>
>         /* Determine whether we need to wake up potentially idle CPU: */
> @@ -5701,13 +5683,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>                 /* end evaluation on encountering a throttled cfs_rq */
>                 if (cfs_rq_throttled(cfs_rq))
>                         goto enqueue_throttle;
> -
> -               /*
> -                * One parent has been throttled and cfs_rq removed from the
> -                * list. Add it back to not break the leaf list.
> -                */
> -               if (throttled_hierarchy(cfs_rq))
> -                       list_add_leaf_cfs_rq(cfs_rq);
>         }
>
>         /* At this point se is NULL and we are at root level*/
> @@ -5731,21 +5706,6 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>                 update_overutilized_status(rq);
>
>  enqueue_throttle:
> -       if (cfs_bandwidth_used()) {
> -               /*
> -                * When bandwidth control is enabled; the cfs_rq_throttled()
> -                * breaks in the above iteration can result in incomplete
> -                * leaf list maintenance, resulting in triggering the assertion
> -                * below.
> -                */
> -               for_each_sched_entity(se) {
> -                       cfs_rq = cfs_rq_of(se);
> -
> -                       if (list_add_leaf_cfs_rq(cfs_rq))
> -                               break;
> -               }
> -       }
> -
>         assert_list_leaf_cfs_rq(rq);
>
>         hrtick_update(rq);
> --
> 2.35.2
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ