lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKfTPtAAoXiFhL6SE1WvM7U5ZJodcPDuT_meL6uPbV9LYDAZbw@mail.gmail.com>
Date:   Wed, 30 Jan 2019 15:30:18 +0100
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     linux-kernel <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...hat.com>, Tejun Heo <tj@...nel.org>,
        Sargun Dhillon <sargun@...gun.me>
Subject: Re: [PATCH v2] sched/fair: Fix insertion in rq->leaf_cfs_rq_list

On Wed, 30 Jan 2019 at 15:01, Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Wed, Jan 30, 2019 at 06:22:47AM +0100, Vincent Guittot wrote:
> > Sargun reported a crash:
> >   "I picked up c40f7d74c741a907cfaeb73a7697081881c497d0 sched/fair: Fix
> >    infinite loop in update_blocked_averages() by reverting a9e7f6544b9c
> >    and put it on top of 4.19.13. In addition to this, I uninlined
> >    list_add_leaf_cfs_rq for debugging.
> >
> >    This revealed a new bug that we didn't get to because we kept getting
> >    crashes from the previous issue. When we are running with cgroups that
> >    are rapidly changing, with CFS bandwidth control, and in addition
> >    using the cpusets cgroup, we see this crash. Specifically, it seems to
> >    occur with cgroups that are throttled and we change the allowed
> >    cpuset."
> >
> > The algorithm used to order cfs_rq in rq->leaf_cfs_rq_list assumes that
> > it will walk down to root the 1st time a cfs_rq is used and we will finish
> > to add either a cfs_rq without parent or a cfs_rq with a parent that is
> > already on the list. But this is not always true in presence of throttling.
> > Because a cfs_rq can be throttled even if it has never been used but other CPUs
> > of the cgroup have already used all the bandwdith, we are not sure to go down to
> > the root and add all cfs_rq in the list.
> >
> > Ensure that all cfs_rq will be added in the list even if they are throttled.
> >
> > Reported-by: Sargun Dhillon <sargun@...gun.me>
> > Fixes: 9c2791f936ef ("Fix hierarchical order in rq->leaf_cfs_rq_list")
> > Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
>
> Given my previous patch; how about something like so instead?

I still have to run some tests but this looks good to me

>
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -282,13 +282,15 @@ static inline struct cfs_rq *group_cfs_r
>         return grp->my_q;
>  }
>
> -static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
> +static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
>  {
>         struct rq *rq = rq_of(cfs_rq);
>         int cpu = cpu_of(rq);
>
>         if (cfs_rq->on_list)
> -               return;
> +               return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
> +
> +       cfs_rq->on_list = 1;
>
>         /*
>          * Ensure we either appear before our parent (if already
> @@ -315,7 +317,10 @@ static inline void list_add_leaf_cfs_rq(
>                  * list.
>                  */
>                 rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
> -       } else if (!cfs_rq->tg->parent) {
> +               return true;
> +       }
> +
> +       if (!cfs_rq->tg->parent) {
>                 /*
>                  * cfs rq without parent should be put
>                  * at the tail of the list.
> @@ -327,23 +332,22 @@ static inline void list_add_leaf_cfs_rq(
>                  * tmp_alone_branch to the beginning of the list.
>                  */
>                 rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
> -       } else {
> -               /*
> -                * The parent has not already been added so we want to
> -                * make sure that it will be put after us.
> -                * tmp_alone_branch points to the begin of the branch
> -                * where we will add parent.
> -                */
> -               list_add_rcu(&cfs_rq->leaf_cfs_rq_list,
> -                       rq->tmp_alone_branch);
> -               /*
> -                * update tmp_alone_branch to points to the new begin
> -                * of the branch
> -                */
> -               rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
> +               return true;
>         }
>
> -       cfs_rq->on_list = 1;
> +       /*
> +        * The parent has not already been added so we want to
> +        * make sure that it will be put after us.
> +        * tmp_alone_branch points to the begin of the branch
> +        * where we will add parent.
> +        */
> +       list_add_rcu(&cfs_rq->leaf_cfs_rq_list, rq->tmp_alone_branch);
> +       /*
> +        * update tmp_alone_branch to points to the new begin
> +        * of the branch
> +        */
> +       rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
> +       return false;
>  }
>
>  static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
> @@ -4999,6 +5003,12 @@ static void __maybe_unused unthrottle_of
>  }
>
>  #else /* CONFIG_CFS_BANDWIDTH */
> +
> +static inline bool cfs_bandwidth_used(void)
> +{
> +       return false;
> +}
> +
>  static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
>  {
>         return rq_clock_task(rq_of(cfs_rq));
> @@ -5190,6 +5200,21 @@ enqueue_task_fair(struct rq *rq, struct
>
>         }
>
> +       if (cfs_bandwidth_used()) {
> +               /*
> +                * When bandwidth control is enabled; the cfs_rq_throttled()
> +                * breaks in the above iteration can result in incomplete
> +                * leaf list maintenance, resulting in triggering the assertion
> +                * below.
> +                */
> +               for_each_sched_entity(se) {
> +                       cfs_rq = cfs_rq_of(se);
> +
> +                       if (list_add_leaf_cfs_rq(cfs_rq))
> +                               break;
> +               }
> +       }
> +
>         assert_list_leaf_cfs_rq(rq);
>
>         hrtick_update(rq);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ