When the capacity drops low, we want to migrate load away. Allow the load-balancer to remove all tasks when we hit rock bottom. Signed-off-by: Peter Zijlstra LKML-Reference: --- kernel/sched.c | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) Index: linux-2.6/kernel/sched.c =================================================================== --- linux-2.6.orig/kernel/sched.c +++ linux-2.6/kernel/sched.c @@ -3951,7 +3951,7 @@ static inline void update_sd_lb_stats(st * and move all the excess tasks away. */ if (prefer_sibling) - sgs.group_capacity = 1; + sgs.group_capacity = min(sgs.group_capacity, 1); if (local_group) { sds->this_load = sgs.avg_load; @@ -4183,6 +4183,26 @@ ret: return NULL; } +static struct sched_group *group_of(int cpu) +{ + struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); + + if (!sd) + return NULL; + + return sd->groups; +} + +static unsigned long power_of(int cpu) +{ + struct sched_group *group = group_of(cpu); + + if (!group) + return SCHED_LOAD_SCALE; + + return group->__cpu_power; +} + /* * find_busiest_queue - find the busiest runqueue among the cpus in group. */ @@ -4195,15 +4215,18 @@ find_busiest_queue(struct sched_group *g int i; for_each_cpu(i, sched_group_cpus(group)) { + unsigned long power = power_of(i); + unsigned long capacity = power >> SCHED_LOAD_SHIFT; unsigned long wl; if (!cpumask_test_cpu(i, cpus)) continue; rq = cpu_rq(i); - wl = weighted_cpuload(i); + wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; + wl /= power; - if (rq->nr_running == 1 && wl > imbalance) + if (capacity && rq->nr_running == 1 && wl > imbalance) continue; if (wl > max_load) { -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/