Remove the load from the load_sum for sched_entities, basically turning load_sum into runnable_sum. This prepares for better reweighting of group entities. Since we now have different rules for computing load_avg, split ___update_load_avg() into two parts, ___update_load_sum() and ___update_load_avg(). So for se: ___update_load_sum(.weight = 1) ___upate_load_avg(.weight = se->load.weight) and for cfs_rq: ___update_load_sum(.weight = cfs_rq->load.weight) ___upate_load_avg(.weight = 1) Since the primary consumable is load_avg, most things will not be affected. Only those few sites that initialize/modify load_sum need attention. Signed-off-by: Peter Zijlstra (Intel) --- kernel/sched/fair.c | 91 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 64 insertions(+), 27 deletions(-) --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -744,7 +744,7 @@ void init_entity_runnable_average(struct */ if (entity_is_task(se)) sa->load_avg = scale_load_down(se->load.weight); - sa->load_sum = sa->load_avg * LOAD_AVG_MAX; + sa->load_sum = LOAD_AVG_MAX; /* * At this point, util_avg won't be used in select_task_rq_fair anyway */ @@ -1967,7 +1967,7 @@ static u64 numa_get_avg_runtime(struct t delta = runtime - p->last_sum_exec_runtime; *period = now - p->last_task_numa_placement; } else { - delta = p->se.avg.load_sum / p->se.load.weight; + delta = p->se.avg.load_sum; *period = LOAD_AVG_MAX; } @@ -2872,8 +2872,8 @@ accumulate_sum(u64 delta, int cpu, struc * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ static __always_inline int -___update_load_avg(u64 now, int cpu, struct sched_avg *sa, - unsigned long weight, int running, struct cfs_rq *cfs_rq) +___update_load_sum(u64 now, int cpu, struct sched_avg *sa, + unsigned long weight, int running, struct cfs_rq *cfs_rq) { u64 delta; @@ -2907,39 +2907,80 @@ ___update_load_avg(u64 now, int cpu, str if (!accumulate_sum(delta, cpu, sa, weight, running, cfs_rq)) return 0; + return 1; +} + +static __always_inline void +___update_load_avg(struct sched_avg *sa, unsigned long weight, struct cfs_rq *cfs_rq) +{ + u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib; + /* * Step 2: update *_avg. */ - sa->load_avg = div_u64(sa->load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib); + sa->load_avg = div_u64(weight * sa->load_sum, divider); if (cfs_rq) { cfs_rq->runnable_load_avg = - div_u64(cfs_rq->runnable_load_sum, LOAD_AVG_MAX - 1024 + sa->period_contrib); + div_u64(cfs_rq->runnable_load_sum, divider); } - sa->util_avg = sa->util_sum / (LOAD_AVG_MAX - 1024 + sa->period_contrib); + sa->util_avg = sa->util_sum / divider; +} - return 1; +/* + * XXX we want to get rid of this helper and use the full load resolution. + */ +static inline long se_weight(struct sched_entity *se) +{ + return scale_load_down(se->load.weight); } +/* + * sched_entity: + * + * load_sum := runnable_sum + * load_avg = se_weight(se) * runnable_avg + * + * cfq_rs: + * + * load_sum = \Sum se_weight(se) * se->avg.load_sum + * load_avg = \Sum se->avg.load_avg + */ + static int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se) { - return ___update_load_avg(now, cpu, &se->avg, 0, 0, NULL); + if (___update_load_sum(now, cpu, &se->avg, 0, 0, NULL)) { + ___update_load_avg(&se->avg, se_weight(se), NULL); + return 1; + } + + return 0; } static int __update_load_avg_se(u64 now, int cpu, struct cfs_rq *cfs_rq, struct sched_entity *se) { - return ___update_load_avg(now, cpu, &se->avg, - se->on_rq * scale_load_down(se->load.weight), - cfs_rq->curr == se, NULL); + if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, + cfs_rq->curr == se, NULL)) { + + ___update_load_avg(&se->avg, se_weight(se), NULL); + return 1; + } + + return 0; } static int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq) { - return ___update_load_avg(now, cpu, &cfs_rq->avg, - scale_load_down(cfs_rq->load.weight), - cfs_rq->curr != NULL, cfs_rq); + if (___update_load_sum(now, cpu, &cfs_rq->avg, + scale_load_down(cfs_rq->load.weight), + cfs_rq->curr != NULL, cfs_rq)) { + ___update_load_avg(&cfs_rq->avg, 1, cfs_rq); + return 1; + } + + return 0; } /* @@ -3110,7 +3151,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq /* Set new sched_entity's load */ se->avg.load_avg = load; - se->avg.load_sum = se->avg.load_avg * LOAD_AVG_MAX; + se->avg.load_sum = LOAD_AVG_MAX; /* Update parent cfs_rq load */ add_positive(&cfs_rq->avg.load_avg, delta); @@ -3340,7 +3381,7 @@ static void attach_entity_load_avg(struc { se->avg.last_update_time = cfs_rq->avg.last_update_time; cfs_rq->avg.load_avg += se->avg.load_avg; - cfs_rq->avg.load_sum += se->avg.load_sum; + cfs_rq->avg.load_sum += se_weight(se) * se->avg.load_sum; cfs_rq->avg.util_avg += se->avg.util_avg; cfs_rq->avg.util_sum += se->avg.util_sum; set_tg_cfs_propagate(cfs_rq); @@ -3360,7 +3401,7 @@ static void detach_entity_load_avg(struc { sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); - sub_positive(&cfs_rq->avg.load_sum, se->avg.load_sum); + sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); set_tg_cfs_propagate(cfs_rq); @@ -3372,12 +3413,10 @@ static void detach_entity_load_avg(struc static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - struct sched_avg *sa = &se->avg; - - cfs_rq->runnable_load_avg += sa->load_avg; - cfs_rq->runnable_load_sum += sa->load_sum; + cfs_rq->runnable_load_avg += se->avg.load_avg; + cfs_rq->runnable_load_sum += se_weight(se) * se->avg.load_sum; - if (!sa->last_update_time) { + if (!se->avg.last_update_time) { attach_entity_load_avg(cfs_rq, se); update_tg_load_avg(cfs_rq, 0); } @@ -3387,10 +3426,8 @@ enqueue_entity_load_avg(struct cfs_rq *c static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - cfs_rq->runnable_load_avg = - max_t(long, cfs_rq->runnable_load_avg - se->avg.load_avg, 0); - cfs_rq->runnable_load_sum = - max_t(s64, cfs_rq->runnable_load_sum - se->avg.load_sum, 0); + sub_positive(&cfs_rq->runnable_load_avg, se->avg.load_avg); + sub_positive(&cfs_rq->runnable_load_sum, se_weight(se) * se->avg.load_sum); } #ifndef CONFIG_64BIT