cpu_power is supposed to be a representation of the process capacity of the cpu, not a value to randomly tweak in order to affect placement. Remove the placement hacks. Signed-off-by: Peter Zijlstra --- include/linux/sched.h | 1 + include/linux/topology.h | 1 + kernel/sched.c | 34 ++++++++++++++++++---------------- 3 files changed, 20 insertions(+), 16 deletions(-) Index: linux-2.6/kernel/sched.c =================================================================== --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -8468,15 +8468,13 @@ static void free_sched_groups(const stru * there are asymmetries in the topology. If there are asymmetries, group * having more cpu_power will pickup more load compared to the group having * less cpu_power. - * - * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents - * the maximum number of tasks a group can handle in the presence of other idle - * or lightly loaded groups in the same sched domain. */ static void init_sched_groups_power(int cpu, struct sched_domain *sd) { struct sched_domain *child; struct sched_group *group; + long power; + int weight; WARN_ON(!sd || !sd->groups); @@ -8487,22 +8485,20 @@ static void init_sched_groups_power(int sd->groups->__cpu_power = 0; - /* - * For perf policy, if the groups in child domain share resources - * (for example cores sharing some portions of the cache hierarchy - * or SMT), then set this domain groups cpu_power such that each group - * can handle only one task, when there are other idle groups in the - * same sched domain. - */ - if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) && - (child->flags & - (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) { - sg_inc_cpu_power(sd->groups, SCHED_LOAD_SCALE); + if (!child) { + power = SCHED_LOAD_SCALE; + weight = cpumask_weight(sched_domain_span(sd)); + /* + * SMT siblings share the power of a single core. + */ + if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) + power /= weight; + sg_inc_cpu_power(sd->groups, power); return; } /* - * add cpu_power of each child group to this groups cpu_power + * Add cpu_power of each child group to this groups cpu_power. */ group = child->groups; do { -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/