Move the entity migrate handling from enqueue_entity_load_avg() to update_load_avg(). This has two benefits: - {en,de}queue_entity_load_avg() will become purely about managing runnable_load - we can avoid a double update_tg_load_avg() and reduce pressure on the global tg->shares cacheline The reason we do this is so that we can change update_cfs_shares() to change both weight and (future) runnable_weight. For this to work we need to have the cfs_rq averages up-to-date (which means having done the attach), but we need the cfs_rq->avg.runnable_avg to not yet include the se's contribution (since se->on_rq == 0). Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 70 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 34 deletions(-) --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3418,34 +3418,6 @@ update_cfs_rq_load_avg(u64 now, struct c return decayed || removed_load; } -/* - * Optional action to be done while updating the load average - */ -#define UPDATE_TG 0x1 -#define SKIP_AGE_LOAD 0x2 - -/* Update task and its cfs_rq load average */ -static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) -{ - u64 now = cfs_rq_clock_task(cfs_rq); - struct rq *rq = rq_of(cfs_rq); - int cpu = cpu_of(rq); - int decayed; - - /* - * Track task load average for carrying it to new CPU after migrated, and - * track group sched_entity load average for task_h_load calc in migration - */ - if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) - __update_load_avg_se(now, cpu, cfs_rq, se); - - decayed = update_cfs_rq_load_avg(now, cfs_rq, true); - decayed |= propagate_entity_load_avg(se); - - if (decayed && (flags & UPDATE_TG)) - update_tg_load_avg(cfs_rq, 0); -} - /** * attach_entity_load_avg - attach this entity to its cfs_rq load avg * @cfs_rq: cfs_rq to attach to @@ -3486,17 +3458,46 @@ static void detach_entity_load_avg(struc cfs_rq_util_change(cfs_rq); } +/* + * Optional action to be done while updating the load average + */ +#define UPDATE_TG 0x1 +#define SKIP_AGE_LOAD 0x2 +#define DO_ATTACH 0x4 + +/* Update task and its cfs_rq load average */ +static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) +{ + u64 now = cfs_rq_clock_task(cfs_rq); + struct rq *rq = rq_of(cfs_rq); + int cpu = cpu_of(rq); + int decayed; + + /* + * Track task load average for carrying it to new CPU after migrated, and + * track group sched_entity load average for task_h_load calc in migration + */ + if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD)) + __update_load_avg_se(now, cpu, cfs_rq, se); + + decayed = update_cfs_rq_load_avg(now, cfs_rq, true); + decayed |= propagate_entity_load_avg(se); + + if (!se->avg.last_update_time && (flags & DO_ATTACH)) { + + attach_entity_load_avg(cfs_rq, se); + update_tg_load_avg(cfs_rq, 0); + + } else if (decayed && (flags & UPDATE_TG)) + update_tg_load_avg(cfs_rq, 0); +} + /* Add the load generated by se into cfs_rq's load average */ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { cfs_rq->runnable_load_avg += se->avg.load_avg; cfs_rq->runnable_load_sum += se_weight(se) * se->avg.load_sum; - - if (!se->avg.last_update_time) { - attach_entity_load_avg(cfs_rq, se); - update_tg_load_avg(cfs_rq, 0); - } } /* Remove the runnable load generated by se from cfs_rq's runnable load average */ @@ -3586,6 +3587,7 @@ update_cfs_rq_load_avg(u64 now, struct c #define UPDATE_TG 0x0 #define SKIP_AGE_LOAD 0x0 +#define DO_ATTACH 0x0 static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1) { @@ -3740,7 +3742,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, st * its group cfs_rq * - Add its new weight to cfs_rq->load.weight */ - update_load_avg(cfs_rq, se, UPDATE_TG); + update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH); enqueue_entity_load_avg(cfs_rq, se); update_cfs_shares(se); account_entity_enqueue(cfs_rq, se);