[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260123091402.675730-1-realwujing@gmail.com>
Date: Fri, 23 Jan 2026 04:14:01 -0500
From: Qiliang Yuan <realwujing@...il.com>
To: christian.loehle@....com
Cc: bsegall@...gle.com,
dietmar.eggemann@....com,
juri.lelli@...hat.com,
linux-kernel@...r.kernel.org,
mgorman@...e.de,
mingo@...hat.com,
peterz@...radead.org,
realwujing@...il.com,
rostedt@...dmis.org,
vincent.guittot@...aro.org,
vschneid@...hat.com,
yuanql9@...natelecom.cn
Subject: [PATCH v2] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Pre-calculate the base maximum utilization of each performance domain during the
main loop of find_energy_efficient_cpu() and cache it in the local
'energy_env' structure.
By caching this base value, the maximum utilization for candidate CPU
placements (such as prev_cpu and max_spare_cap_cpu) can be determined in
O(1) time, eliminating redundant scans of the performance domain. This
optimizes the energy estimation path by reducing the number of scans per
performance domain from three to one.
This change significantly reduces wake-up latency on systems with high core
counts or complex performance domain topologies by minimizing the overall
complexity of the Energy-Aware Scheduling (EAS) calculation.
Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
Signed-off-by: Qiliang Yuan <realwujing@...il.com>
---
v2:
- Ensure RCU safety by using local 'energy_env' for caching instead of
modifying the shared 'perf_domain' structure.
- Consolidate pre-calculation into the main loop to avoid an extra pass
over the performance domains.
v1:
- Optimize energy calculation by pre-calculating performance domain max utilization.
- Add max_util and max_spare_cap_cpu to struct perf_domain.
- Reduce inner loop complexity from O(N) to O(1) for energy estimation.
kernel/sched/fair.c | 36 ++++++++++++++++++------------------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..5c114c49c202 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8148,6 +8148,7 @@ struct energy_env {
unsigned long pd_busy_time;
unsigned long cpu_cap;
unsigned long pd_cap;
+ unsigned long pd_max_util;
};
/*
@@ -8215,41 +8216,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
* exceed @eenv->cpu_cap.
*/
static inline unsigned long
-eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
struct task_struct *p, int dst_cpu)
{
- unsigned long max_util = 0;
- int cpu;
+ unsigned long max_util = eenv->pd_max_util;
- for_each_cpu(cpu, pd_cpus) {
- struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
- unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
+ if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
+ unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
unsigned long eff_util, min, max;
- /*
- * Performance domain frequency: utilization clamping
- * must be considered since it affects the selection
- * of the performance domain frequency.
- * NOTE: in case RT tasks are running, by default the min
- * utilization can be max OPP.
- */
- eff_util = effective_cpu_util(cpu, util, &min, &max);
+ eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
/* Task's uclamp can modify min and max value */
- if (tsk && uclamp_is_used()) {
+ if (uclamp_is_used()) {
min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
/*
* If there is no active max uclamp constraint,
* directly use task's one, otherwise keep max.
*/
- if (uclamp_rq_is_idle(cpu_rq(cpu)))
+ if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
max = uclamp_eff_value(p, UCLAMP_MAX);
else
max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
}
- eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
+ eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
max_util = max(max_util, eff_util);
}
@@ -8265,7 +8257,7 @@ static inline unsigned long
compute_energy(struct energy_env *eenv, struct perf_domain *pd,
struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
{
- unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+ unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
unsigned long busy_time = eenv->pd_busy_time;
unsigned long energy;
@@ -8376,12 +8368,20 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
eenv.cpu_cap = cpu_actual_cap;
eenv.pd_cap = 0;
+ eenv.pd_max_util = 0;
for_each_cpu(cpu, cpus) {
struct rq *rq = cpu_rq(cpu);
+ unsigned long util_b, eff_util_b, min_b, max_b;
eenv.pd_cap += cpu_actual_cap;
+ /* Pre-calculate base max utilization for the performance domain */
+ util_b = cpu_util(cpu, p, -1, 1);
+ eff_util_b = effective_cpu_util(cpu, util_b, &min_b, &max_b);
+ eff_util_b = sugov_effective_cpu_perf(cpu, eff_util_b, min_b, max_b);
+ eenv.pd_max_util = max(eenv.pd_max_util, eff_util_b);
+
if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
continue;
--
2.51.0
Powered by blists - more mailing lists