[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20260204120509.3950227-1-realwujing@gmail.com>
Date: Wed, 4 Feb 2026 07:05:05 -0500
From: Qiliang Yuan <realwujing@...il.com>
To: Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>
Cc: Qiliang Yuan <realwujing@...il.com>,
Qiliang Yuan <yuanql9@...natelecom.cn>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>,
Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>,
Valentin Schneider <vschneid@...hat.com>,
linux-kernel@...r.kernel.org
Subject: [PATCH v3] sched/fair: Optimize EAS by reducing redundant performance domain scans
Consolidate performance domain (PD) statistic calculations in the
find_energy_efficient_cpu() wake-up path.
Calculate 'pd_max_util' and 'pd_busy_time' during the initial CPU
iteration within the performance domain. Cache these values in the local
'energy_env' structure to eliminate subsequent redundancy. This reduces
the number of full PD scans from three to one per performance domain.
This optimization significantly lowers the constant factor of the
Energy-Aware Scheduling calculation, minimizing wake-up latency
on systems with large performance domains or complex topologies.
Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
Signed-off-by: Qiliang Yuan <realwujing@...il.com>
---
v3:
- Further optimize by consolidating pd_busy_time calculation into the
main loop, reducing PD scans from 3 to 1.
- Rename patch title to accurately reflect "reducing redundant scans"
instead of a total complexity change from O(N) to O(1), addressing
reviewers' feedback.
v2:
- Ensure RCU safety by using local 'energy_env' for caching instead of
modifying the shared 'perf_domain' structure.
- Consolidate pre-calculation into the main loop to avoid an extra pass
over the performance domains.
v1:
- Initial optimization of energy calculation by pre-calculating
performance domain max utilization.
kernel/sched/fair.c | 44 +++++++++++++++++++++++++-------------------
1 file changed, 25 insertions(+), 19 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..4ed10cb9e8e0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8148,6 +8148,7 @@ struct energy_env {
unsigned long pd_busy_time;
unsigned long cpu_cap;
unsigned long pd_cap;
+ unsigned long pd_max_util;
};
/*
@@ -8215,41 +8216,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
* exceed @eenv->cpu_cap.
*/
static inline unsigned long
-eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
struct task_struct *p, int dst_cpu)
{
- unsigned long max_util = 0;
- int cpu;
+ unsigned long max_util = eenv->pd_max_util;
- for_each_cpu(cpu, pd_cpus) {
- struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
- unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
+ if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
+ unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
unsigned long eff_util, min, max;
- /*
- * Performance domain frequency: utilization clamping
- * must be considered since it affects the selection
- * of the performance domain frequency.
- * NOTE: in case RT tasks are running, by default the min
- * utilization can be max OPP.
- */
- eff_util = effective_cpu_util(cpu, util, &min, &max);
+ eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
/* Task's uclamp can modify min and max value */
- if (tsk && uclamp_is_used()) {
+ if (uclamp_is_used()) {
min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
/*
* If there is no active max uclamp constraint,
* directly use task's one, otherwise keep max.
*/
- if (uclamp_rq_is_idle(cpu_rq(cpu)))
+ if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
max = uclamp_eff_value(p, UCLAMP_MAX);
else
max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
}
- eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
+ eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
max_util = max(max_util, eff_util);
}
@@ -8265,7 +8257,7 @@ static inline unsigned long
compute_energy(struct energy_env *eenv, struct perf_domain *pd,
struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
{
- unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+ unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
unsigned long busy_time = eenv->pd_busy_time;
unsigned long energy;
@@ -8376,12 +8368,26 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
eenv.cpu_cap = cpu_actual_cap;
eenv.pd_cap = 0;
+ eenv.pd_max_util = 0;
+ eenv.pd_busy_time = 0;
for_each_cpu(cpu, cpus) {
struct rq *rq = cpu_rq(cpu);
+ unsigned long util_b, eff_util_b, min_b, max_b;
+ unsigned long util_bt;
eenv.pd_cap += cpu_actual_cap;
+ /* Pre-calculate base max utilization for the performance domain */
+ util_b = cpu_util(cpu, p, -1, 1);
+ eff_util_b = effective_cpu_util(cpu, util_b, &min_b, &max_b);
+ eff_util_b = sugov_effective_cpu_perf(cpu, eff_util_b, min_b, max_b);
+ eenv.pd_max_util = max(eenv.pd_max_util, eff_util_b);
+
+ /* Pre-calculate base busy time for the performance domain */
+ util_bt = cpu_util(cpu, p, -1, 0);
+ eenv.pd_busy_time += effective_cpu_util(cpu, util_bt, NULL, NULL);
+
if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
continue;
@@ -8439,7 +8445,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
if (max_spare_cap_cpu < 0 && prev_spare_cap < 0)
continue;
- eenv_pd_busy_time(&eenv, cpus, p);
+ eenv.pd_busy_time = min(eenv.pd_cap, eenv.pd_busy_time);
/* Compute the 'base' energy of the pd, without @p */
base_energy = compute_energy(&eenv, pd, cpus, p, -1);
--
2.51.0
Powered by blists - more mailing lists