Currently task_h_load() computes the load of a task and uses that to either subtract it from the total, or add to it. However, removing or adding a task need not have any effect on the total load at all. Imagine adding a task to a group that is local to one cpu - in that case the total load of that cpu is unaffected. So properly compute addition/removal: s_i = S * rw_i / \Sum_j rw_j s'_i = S * (rw_i + wl) / (\Sum_j rw_j + wg) then s'_i - s_i gives the change in load. Where s_i is the shares for cpu i, S the group weight, rw_i the runqueue weight for that cpu, wl the weight we add (subtract) and wg the weight contribution to the runqueue. Signed-off-by: Peter Zijlstra --- kernel/sched_fair.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 40 insertions(+), 9 deletions(-) Index: linux-2.6/kernel/sched_fair.c =================================================================== --- linux-2.6.orig/kernel/sched_fair.c +++ linux-2.6/kernel/sched_fair.c @@ -1071,22 +1071,53 @@ static inline int wake_idle(int cpu, str static const struct sched_class fair_sched_class; #ifdef CONFIG_FAIR_GROUP_SCHED -static unsigned long task_h_load(struct task_struct *p) +static unsigned long effective_load(struct task_group *tg, long wl, int cpu) { - unsigned long h_load = p->se.load.weight; - struct cfs_rq *cfs_rq = cfs_rq_of(&p->se); + struct sched_entity *se = tg->se[cpu]; + long wg = wl; - update_h_load(task_cpu(p)); + for_each_sched_entity(se) { +#define D(n) (likely(n) ? (n) : 1) + + long S, Srw, rw, s, sn; + + S = se->my_q->tg->shares; + s = se->my_q->shares; + rw = se->my_q->load.weight; - h_load = calc_delta_mine(h_load, cfs_rq->h_load, &cfs_rq->load); + Srw = S * rw / D(s); + sn = S * (rw + wl) / D(Srw + wg); + + wl = sn - s; + wg = 0; +#undef D + } - return h_load; + return wl; } + +static unsigned long task_load_sub(struct task_struct *p) +{ + return effective_load(task_group(p), -(long)p->se.load.weight, task_cpu(p)); +} + +static unsigned long task_load_add(struct task_struct *p, int cpu) +{ + return effective_load(task_group(p), p->se.load.weight, cpu); +} + #else -static unsigned long task_h_load(struct task_struct *p) + +static unsigned long task_load_sub(struct task_struct *p) +{ + return -p->se.load.weight; +} + +static unsigned long task_load_add(struct task_struct *p, int cpu) { return p->se.load.weight; } + #endif static int @@ -1109,9 +1140,9 @@ wake_affine(struct rq *rq, struct sched_ * of the current CPU: */ if (sync) - tl -= task_h_load(current); + tl += task_load_sub(current); - balanced = 100*(tl + task_h_load(p)) <= imbalance*load; + balanced = 100*(tl + task_load_add(p, this_cpu)) <= imbalance*load; /* * If the currently running task will sleep within -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/