lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AANLkTi=w8MUhf3_=ADzPX1gPycYrQkGywQSAm6cLeidE@mail.gmail.com>
Date:	Fri, 3 Sep 2010 02:52:54 +0100
From:	Paul Turner <pjt@...gle.com>
To:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc:	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...e.hu>,
	Srivatsa Vaddagiri <vatsa@...ibm.com>,
	Chris Friesen <cfriesen@...tel.com>,
	Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
	Pierre Bourdon <pbourdon@...ellency.fr>
Subject: Re: [RFC][PATCH 3/3] sched: On-demand tg_shares_up()

On Sat, Aug 28, 2010 at 11:30 PM, Peter Zijlstra <a.p.zijlstra@...llo.nl> wrote:
> Make tg_shares_up() use the active cgroup list, this means we cannot
> do a strict bottom-up walk of the hierarchy, but assuming its a very
> wide tree with a small number of active groups it should be a win.
>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> ---
>  kernel/sched.c      |   67 ----------------------------------------------------
>  kernel/sched_fair.c |   59 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 59 insertions(+), 67 deletions(-)
>
> Index: linux-2.6/kernel/sched.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched.c
> +++ linux-2.6/kernel/sched.c
> @@ -281,13 +281,6 @@ static DEFINE_SPINLOCK(task_group_lock);
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>
> -#ifdef CONFIG_SMP
> -static int root_task_group_empty(void)
> -{
> -       return list_empty(&root_task_group.children);
> -}
> -#endif
> -
>  # define INIT_TASK_GROUP_LOAD  NICE_0_LOAD
>
>  /*
> @@ -1530,48 +1523,6 @@ static unsigned long cpu_avg_load_per_ta
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
>
> -static void update_cfs_load(struct cfs_rq *cfs_rq, int lb);
> -static void update_cfs_shares(struct cfs_rq *cfs_rq);
> -
> -/*
> - * update tg->load_weight by folding this cpu's load_avg
> - */
> -static int tg_shares_up(struct task_group *tg, void *data)
> -{
> -       unsigned long load_avg;
> -       struct cfs_rq *cfs_rq;
> -       unsigned long flags;
> -       int cpu = (long)data;
> -       struct rq *rq;
> -
> -       if (!tg->se[cpu])
> -               return 0;
> -
> -       rq = cpu_rq(cpu);
> -       cfs_rq = tg->cfs_rq[cpu];
> -
> -       raw_spin_lock_irqsave(&rq->lock, flags);
> -
> -       update_rq_clock(rq);
> -       update_cfs_load(cfs_rq, 1);
> -
> -       load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
> -       load_avg -= cfs_rq->load_contribution;
> -
> -       atomic_add(load_avg, &tg->load_weight);
> -       cfs_rq->load_contribution += load_avg;
> -
> -       /*
> -        * We need to update shares after updating tg->load_weight in
> -        * order to adjust the weight of groups with long running tasks.
> -        */
> -       update_cfs_shares(cfs_rq);
> -
> -       raw_spin_unlock_irqrestore(&rq->lock, flags);
> -
> -       return 0;
> -}
> -
>  /*
>  * Compute the cpu's hierarchical load factor for each task group.
>  * This needs to be done in a top-down fashion because the load of a child
> @@ -1595,29 +1546,11 @@ static int tg_load_down(struct task_grou
>        return 0;
>  }
>
> -static void update_shares(long cpu)
> -{
> -       if (root_task_group_empty())
> -               return;
> -
> -       /*
> -        * XXX: replace with an on-demand list
> -        */
> -
> -       walk_tg_tree(tg_nop, tg_shares_up, (void *)cpu);
> -}
> -
>  static void update_h_load(long cpu)
>  {
>        walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
>  }
>
> -#else
> -
> -static inline void update_shares(int cpu)
> -{
> -}
> -
>  #endif
>
>  #ifdef CONFIG_PREEMPT
> Index: linux-2.6/kernel/sched_fair.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched_fair.c
> +++ linux-2.6/kernel/sched_fair.c
> @@ -2002,6 +2002,61 @@ out:
>  }
>
>  #ifdef CONFIG_FAIR_GROUP_SCHED
> +/*
> + * update tg->load_weight by folding this cpu's load_avg
> + */
> +static int tg_shares_up(struct task_group *tg, int cpu)
> +{
> +       unsigned long load_avg;
> +       struct cfs_rq *cfs_rq;
> +       unsigned long flags;
> +       struct rq *rq;
> +
> +       if (!tg->se[cpu])
> +               return 0;
> +
> +       rq = cpu_rq(cpu);
> +       cfs_rq = tg->cfs_rq[cpu];
> +
> +       raw_spin_lock_irqsave(&rq->lock, flags);
> +
> +       update_rq_clock(rq);
> +       update_cfs_load(cfs_rq, 1);
> +
> +       load_avg = div64_u64(cfs_rq->load_avg, cfs_rq->load_period+1);
> +       load_avg -= cfs_rq->load_contribution;
> +
> +       atomic_add(load_avg, &tg->load_weight);
> +       cfs_rq->load_contribution += load_avg;
> +
> +       /*
> +        * We need to update shares after updating tg->load_weight in
> +        * order to adjust the weight of groups with long running tasks.
> +        */
> +       update_cfs_shares(cfs_rq);
> +
> +       raw_spin_unlock_irqrestore(&rq->lock, flags);
> +
> +       return 0;
> +}
> +
> +static void update_shares(int cpu)
> +{
> +       struct cfs_rq *cfs_rq;
> +       struct rq *rq = cpu_rq(cpu);
> +
> +       rcu_read_lock();
> +       for_each_leaf_cfs_rq(rq, cfs_rq) {
> +               struct task_group *tg = cfs_rq->tg;
> +
> +               do {
> +                       tg_shares_up(tg, cpu);
> +                       tg = tg->parent;
> +               } while (tg);
> +       }

This will multiply visit taskgroups:

In the case of a-b-task, both {a} and {b} will be on the leaf cfs_rq
list.  Resulting in a being visited both as b's parent and as a leaf
entity.

Rather than juggling to avoid this, it seems easier to maintain an
ordering on rq->leaf_cfs_rq_list.

That is:

When an entity is added:
a) If it has a parent, insert it immediately before the parent in the list.
b) Otherwise it is attached to the root, attach it to the tail of
rq->leaf_cfs_rq_list

This can actually be simplified to: if no parent insert at the tail,
otherwise insert at the head, since we know the parent will always
have been processed prior to the child.

Traversing the list in order should then ensure that all child
entities have been processed for the 'next' entity at any given time
and that its update is coherent.

> +       rcu_read_unlock();
> +}
> +
>  static unsigned long
>  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
>                  unsigned long max_load_move,
> @@ -2049,6 +2104,10 @@ load_balance_fair(struct rq *this_rq, in
>        return max_load_move - rem_load_move;
>  }
>  #else
> +static inline void update_shares(int cpu)
> +{
> +}
> +
>  static unsigned long
>  load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
>                  unsigned long max_load_move,
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ