[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251202102717.GB2556898@noisy.programming.kicks-ass.net>
Date: Tue, 2 Dec 2025 11:27:17 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Ingo Molnar <mingo@...nel.org>
Cc: linux-kernel@...r.kernel.org, Juri Lelli <juri.lelli@...hat.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Valentin Schneider <vschneid@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Shrikanth Hegde <sshegde@...ux.ibm.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Mel Gorman <mgorman@...e.de>, Steven Rostedt <rostedt@...dmis.org>,
Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [PATCH 5/6] sched/fair: Rename cfs_rq::avg_load to
cfs_rq::sum_weight
On Mon, Dec 01, 2025 at 07:46:46AM +0100, Ingo Molnar wrote:
> The ::avg_load field is a long-standing misnomer: it says it's an
> 'average load', but in reality it's the momentary sum of the load
> of all currently runnable tasks. We'd have to also perform a
> division by nr_running (or use time-decay) to arrive at any sort
> of average value.
>
> This is clear from comments about the math of fair scheduling:
>
> * \Sum w_i := cfs_rq->avg_load
>
> The sum of all weights is ... the sum of all weights, not
> the average of all weights.
>
> To make it doubly confusing, there's also an ::avg_load
> in the load-balancing struct sg_lb_stats, which *is* a
> true average.
>
> The second part of the field's name is a minor misnomer
> as well: it says 'load', and it is indeed a load_weight
> structure as it shares code with the load-balancer - but
> it's only in an SMP load-balancing context where
> load = weight, in the fair scheduling context the primary
> purpose is the weighting of different nice levels.
>
> So rename the field to ::sum_weight instead, which makes
> the terminology of the EEVDF math match up with our
> implementation of it:
>
> * \Sum w_i := cfs_rq->sum_weight
>
> Signed-off-by: Ingo Molnar <mingo@...nel.org>
Bah, this is going to be a pain rebasing for me, but yes, these
variables are poorly named. 'sum_weight' is a better name.
> ---
> kernel/sched/fair.c | 16 ++++++++--------
> kernel/sched/sched.h | 2 +-
> 2 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3d6d551168aa..2ffd52a2e7a0 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -608,7 +608,7 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
> *
> * v0 := cfs_rq->zero_vruntime
> * \Sum (v_i - v0) * w_i := cfs_rq->avg_vruntime
> - * \Sum w_i := cfs_rq->avg_load
> + * \Sum w_i := cfs_rq->sum_weight
> *
> * Since zero_vruntime closely tracks the per-task service, these
> * deltas: (v_i - v), will be in the order of the maximal (virtual) lag
> @@ -625,7 +625,7 @@ avg_vruntime_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
> s64 key = entity_key(cfs_rq, se);
>
> cfs_rq->avg_vruntime += key * weight;
> - cfs_rq->avg_load += weight;
> + cfs_rq->sum_weight += weight;
> }
>
> static void
> @@ -635,16 +635,16 @@ avg_vruntime_sub(struct cfs_rq *cfs_rq, struct sched_entity *se)
> s64 key = entity_key(cfs_rq, se);
>
> cfs_rq->avg_vruntime -= key * weight;
> - cfs_rq->avg_load -= weight;
> + cfs_rq->sum_weight -= weight;
> }
>
> static inline
> void avg_vruntime_update(struct cfs_rq *cfs_rq, s64 delta)
> {
> /*
> - * v' = v + d ==> avg_vruntime' = avg_runtime - d*avg_load
> + * v' = v + d ==> avg_vruntime' = avg_runtime - d*sum_weight
> */
> - cfs_rq->avg_vruntime -= cfs_rq->avg_load * delta;
> + cfs_rq->avg_vruntime -= cfs_rq->sum_weight * delta;
> }
>
> /*
> @@ -655,7 +655,7 @@ u64 cfs_avg_vruntime(struct cfs_rq *cfs_rq)
> {
> struct sched_entity *curr = cfs_rq->curr;
> s64 avg = cfs_rq->avg_vruntime;
> - long load = cfs_rq->avg_load;
> + long load = cfs_rq->sum_weight;
>
> if (curr && curr->on_rq) {
> unsigned long weight = scale_load_down(curr->load.weight);
> @@ -723,7 +723,7 @@ static int vruntime_eligible(struct cfs_rq *cfs_rq, u64 vruntime)
> {
> struct sched_entity *curr = cfs_rq->curr;
> s64 avg = cfs_rq->avg_vruntime;
> - long load = cfs_rq->avg_load;
> + long load = cfs_rq->sum_weight;
>
> if (curr && curr->on_rq) {
> unsigned long weight = scale_load_down(curr->load.weight);
> @@ -5172,7 +5172,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> *
> * vl_i = (W + w_i)*vl'_i / W
> */
> - load = cfs_rq->avg_load;
> + load = cfs_rq->sum_weight;
> if (curr && curr->on_rq)
> load += scale_load_down(curr->load.weight);
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 47f7b6df634c..54994d93958a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -679,7 +679,7 @@ struct cfs_rq {
> unsigned int h_nr_idle; /* SCHED_IDLE */
>
> s64 avg_vruntime;
> - u64 avg_load;
> + u64 sum_weight;
>
> u64 zero_vruntime;
> #ifdef CONFIG_SCHED_CORE
> --
> 2.51.0
>
Powered by blists - more mailing lists