It was observed that in __update_group_shares_cpu() 

  rq_weight > aggregate()->rq_weight

This is caused by forks/wakeups in between the initial aggregate pass and
locking of the RQs for load balance. To avoid this situation partially re-do
the aggregation once we have the RQs locked (which avoids new tasks from
appearing).

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/sched.c |   20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

Index: linux-2.6/kernel/sched.c
===================================================================
--- linux-2.6.orig/kernel/sched.c
+++ linux-2.6/kernel/sched.c
@@ -1703,6 +1703,11 @@ aggregate_get_up(struct task_group *tg, 
 	aggregate_group_set_shares(tg, cpu, sd);
 }
 
+static void
+aggregate_get_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
+{
+}
+
 static DEFINE_PER_CPU(spinlock_t, aggregate_lock);
 
 static void __init init_aggregate(void)
@@ -1722,6 +1727,11 @@ static int get_aggregate(int cpu, struct
 	return 1;
 }
 
+static void update_aggregate(int cpu, struct sched_domain *sd)
+{
+	aggregate_walk_tree(aggregate_get_down, aggregate_get_nop, cpu, sd);
+}
+
 static void put_aggregate(int cpu, struct sched_domain *sd)
 {
 	spin_unlock(&per_cpu(aggregate_lock, cpu));
@@ -1743,6 +1753,10 @@ static inline int get_aggregate(int cpu,
 	return 0;
 }
 
+static inline void update_aggregate(int cpu, struct sched_domain *sd)
+{
+}
+
 static inline void put_aggregate(int cpu, struct sched_domain *sd)
 {
 }
@@ -2180,6 +2194,12 @@ find_idlest_group(struct sched_domain *s
 	int load_idx = sd->forkexec_idx;
 	int imbalance = 100 + (sd->imbalance_pct-100)/2;
 
+	/*
+	 * now that we have both rqs locked the rq weight won't change
+	 * anymore - so update the stats.
+	 */
+	update_aggregate(this_cpu, sd);
+
 	do {
 		unsigned long load, avg_load;
 		int local_group;

-- 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/