[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20250903060047.5598-1-hupu.gm@gmail.com>
Date: Wed, 3 Sep 2025 14:00:47 +0800
From: hupu <hupu.gm@...il.com>
To: mingo@...hat.com,
peterz@...radead.org,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
linux-kernel@...r.kernel.org
Cc: hupu.gm@...il.com
Subject: [RFC] sched: Introduce removed.load_sum for precise load propagation
Currently, load_sum to be propagated is estimated from
(removed_runnable * divider) >> SCHED_CAPACITY_SHIFT, which relies on
runnable_avg as an approximation. This approach can introduce precision
loss due to the shift operation, and the error may become more visible
when small tasks frequently enter and leave the queue.
This patch introduces removed.load_sum to directly accumulate
se->avg.load_sum when tasks dequeue, and uses it during load
propagation. By doing so:
a) Avoid relying on runnable_avg-based approximation and obtain
higher precision in load_sum propagation;
b) Eliminate precision loss from the shift operation, especially
with frequent short-lived tasks;
c) Reduce one multiplication and shift in the hotpath, which
theoretically lowers overhead (though the impact is minor).
Signed-off-by: hupu <hupu.gm@...il.com>
---
kernel/sched/fair.c | 8 +++++---
kernel/sched/sched.h | 1 +
2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b173a059315c..b8cf3687804b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4561,7 +4561,8 @@ static void migrate_se_pelt_lag(struct sched_entity *se) {}
static inline int
update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
{
- unsigned long removed_load = 0, removed_util = 0, removed_runnable = 0;
+ unsigned long removed_load_sum = 0, removed_load = 0;
+ unsigned long removed_util = 0, removed_runnable = 0;
struct sched_avg *sa = &cfs_rq->avg;
int decayed = 0;
@@ -4572,6 +4573,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
raw_spin_lock(&cfs_rq->removed.lock);
swap(cfs_rq->removed.util_avg, removed_util);
swap(cfs_rq->removed.load_avg, removed_load);
+ swap(cfs_rq->removed.load_sum, removed_load_sum);
swap(cfs_rq->removed.runnable_avg, removed_runnable);
cfs_rq->removed.nr = 0;
raw_spin_unlock(&cfs_rq->removed.lock);
@@ -4609,8 +4611,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
* removed_runnable is the unweighted version of removed_load so we
* can use it to estimate removed_load_sum.
*/
- add_tg_cfs_propagate(cfs_rq,
- -(long)(removed_runnable * divider) >> SCHED_CAPACITY_SHIFT);
+ add_tg_cfs_propagate(cfs_rq, -(long)removed_load_sum);
decayed = 1;
}
@@ -4792,6 +4793,7 @@ static void remove_entity_load_avg(struct sched_entity *se)
++cfs_rq->removed.nr;
cfs_rq->removed.util_avg += se->avg.util_avg;
cfs_rq->removed.load_avg += se->avg.load_avg;
+ cfs_rq->removed.load_sum += se->avg.load_sum;
cfs_rq->removed.runnable_avg += se->avg.runnable_avg;
raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index be9745d104f7..659935a5c694 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -682,6 +682,7 @@ struct cfs_rq {
struct {
raw_spinlock_t lock ____cacheline_aligned;
int nr;
+ unsigned long load_sum;
unsigned long load_avg;
unsigned long util_avg;
unsigned long runnable_avg;
--
2.51.0
Powered by blists - more mailing lists