When the capacity drops low, we want to migrate load away. Allow the load-balancer to remove all tasks when we hit rock bottom. Signed-off-by: Peter Zijlstra [ego@in.ibm.com: fix to update_sd_power_savings_stats] --- kernel/sched.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) Index: linux-2.6/kernel/sched.c =================================================================== --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -3668,7 +3668,7 @@ static inline void update_sd_power_savin * capacity but still has some space to pick up some load * from other group and save more power */ - if (sgs->sum_nr_running > sgs->group_capacity - 1) + if (sgs->sum_nr_running + 1 > sgs->group_capacity) return; if (sgs->sum_nr_running > sds->leader_nr_running || @@ -3908,8 +3908,8 @@ static inline void update_sg_lb_stats(st if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) sgs->group_imb = 1; - sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; - + sgs->group_capacity = + DIV_ROUND_CLOSEST(group->__cpu_power, SCHED_LOAD_SCALE); } /** @@ -3959,7 +3959,7 @@ static inline void update_sd_lb_stats(st * and move all the excess tasks away. */ if (prefer_sibling) - sgs.group_capacity = 1; + sgs.group_capacity = min(sgs.group_capacity, 1UL); if (local_group) { sds->this_load = sgs.avg_load; @@ -4191,6 +4191,26 @@ ret: return NULL; } +static struct sched_group *group_of(int cpu) +{ + struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); + + if (!sd) + return NULL; + + return sd->groups; +} + +static unsigned long power_of(int cpu) +{ + struct sched_group *group = group_of(cpu); + + if (!group) + return SCHED_LOAD_SCALE; + + return group->__cpu_power; +} + /* * find_busiest_queue - find the busiest runqueue among the cpus in group. */ @@ -4203,15 +4223,18 @@ find_busiest_queue(struct sched_group *g int i; for_each_cpu(i, sched_group_cpus(group)) { + unsigned long power = power_of(i); + unsigned long capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); unsigned long wl; if (!cpumask_test_cpu(i, cpus)) continue; rq = cpu_rq(i); - wl = weighted_cpuload(i); + wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; + wl /= power; - if (rq->nr_running == 1 && wl > imbalance) + if (capacity && rq->nr_running == 1 && wl > imbalance) continue; if (wl > max_load) { -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/