[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87libwnpa7.fsf@sejong.aot.lge.com>
Date: Mon, 14 Jan 2013 17:26:40 +0900
From: Namhyung Kim <namhyung@...nel.org>
To: Michael Wang <wangyun@...ux.vnet.ibm.com>
Cc: LKML <linux-kernel@...r.kernel.org>,
Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Paul Turner <pjt@...gle.com>, Tejun Heo <tj@...nel.org>,
Mike Galbraith <efault@....de>,
Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [RFC PATCH 1/2] sched: schedule balance map foundation
Hi Michael,
On Fri, 11 Jan 2013 16:17:43 +0800, Michael Wang wrote:
> In order to get rid of the complex code in select_task_rq_fair(),
> approach to directly get sd on each level with proper flag is
> required.
>
> Schedule balance map is the solution, which record the sd according
> to it's flag and level.
>
> For example, cpu_sbm->sd[wake][l] will locate the sd of cpu which
> support wake up on level l.
>
> In order to quickly locate the lower sd while changing the base cpu,
> the level with empty sd in map will be filled with the lower sd.
>
> Signed-off-by: Michael Wang <wangyun@...ux.vnet.ibm.com>
> ---
> kernel/sched/core.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++
> kernel/sched/sched.h | 28 +++++++++++++++++++++++
> 2 files changed, 89 insertions(+), 0 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 2d8927f..80810a3 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5497,6 +5497,55 @@ static void update_top_cache_domain(int cpu)
> per_cpu(sd_llc_id, cpu) = id;
> }
>
> +DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_balance_map, sbm_array);
> +
> +static void build_sched_balance_map(int cpu)
> +{
> + struct sched_balance_map *sbm = &per_cpu(sbm_array, cpu);
> + struct sched_domain *sd = cpu_rq(cpu)->sd;
> + struct sched_domain *top_sd = NULL;
> + int i, type, level = 0;
> +
> + while (sd) {
> + if (sd->flags & SD_LOAD_BALANCE) {
> + if (sd->flags & SD_BALANCE_EXEC) {
> + sbm->top_level[SBM_EXEC_TYPE] = sd->level;
> + sbm->sd[SBM_EXEC_TYPE][sd->level] = sd;
> + }
> +
> + if (sd->flags & SD_BALANCE_FORK) {
> + sbm->top_level[SBM_FORK_TYPE] = sd->level;
> + sbm->sd[SBM_FORK_TYPE][sd->level] = sd;
> + }
> +
> + if (sd->flags & SD_BALANCE_WAKE) {
> + sbm->top_level[SBM_WAKE_TYPE] = sd->level;
> + sbm->sd[SBM_WAKE_TYPE][sd->level] = sd;
> + }
> +
> + if (sd->flags & SD_WAKE_AFFINE) {
> + for_each_cpu(i, sched_domain_span(sd)) {
> + if (!sbm->affine_map[i])
> + sbm->affine_map[i] = sd;
> + }
> + }
> + }
> + sd = sd->parent;
> + }
It seems that it can be done like:
for_each_domain(cpu, sd) {
if (!(sd->flags & SD_LOAD_BALANCE))
continue;
if (sd->flags & SD_BALANCE_EXEC)
...
}
> +
> + /*
> + * fill the hole to get lower level sd easily.
> + */
> + for (type = 0; type < SBM_MAX_TYPE; type++) {
> + level = sbm->top_level[type];
> + top_sd = sbm->sd[type][level];
> + if ((++level != SBM_MAX_LEVEL) && top_sd) {
> + for (; level < SBM_MAX_LEVEL; level++)
> + sbm->sd[type][level] = top_sd;
> + }
> + }
> +}
[snip]
> +#ifdef CONFIG_SCHED_SMT
> +#define SBM_MAX_LEVEL 4
> +#else
> +#ifdef CONFIG_SCHED_MC
> +#define SBM_MAX_LEVEL 3
> +#else
> +#ifdef CONFIG_SCHED_BOOK
> +#define SBM_MAX_LEVEL 2
> +#else
> +#define SBM_MAX_LEVEL 1
> +#endif
> +#endif
> +#endif
Looks like this fixed level constants does not consider NUMA domains.
Doesn't accessing sbm->sd[type][level] in the above while loop cause a
problem on big NUMA machines?
Thanks,
Namhyung
> +
> +enum {
> + SBM_EXEC_TYPE,
> + SBM_FORK_TYPE,
> + SBM_WAKE_TYPE,
> + SBM_MAX_TYPE
> +};
> +
> +struct sched_balance_map {
> + struct sched_domain *sd[SBM_MAX_TYPE][SBM_MAX_LEVEL];
> + int top_level[SBM_MAX_TYPE];
> + struct sched_domain *affine_map[NR_CPUS];
> +};
> +
> #endif /* CONFIG_SMP */
>
> /*
> @@ -403,6 +430,7 @@ struct rq {
> #ifdef CONFIG_SMP
> struct root_domain *rd;
> struct sched_domain *sd;
> + struct sched_balance_map *sbm;
>
> unsigned long cpu_power;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists