[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d230cb20-906d-44e7-87b7-9ec2e7c5a543@arm.com>
Date: Thu, 22 Jan 2026 16:08:21 +0000
From: Christian Loehle <christian.loehle@....com>
To: Qiliang Yuan <realwujing@...il.com>, Ingo Molnar <mingo@...hat.com>,
Peter Zijlstra <peterz@...radead.org>, Juri Lelli <juri.lelli@...hat.com>,
Vincent Guittot <vincent.guittot@...aro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>,
Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
Mel Gorman <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>,
linux-kernel@...r.kernel.org, Qiliang Yuan <yuanql9@...natelecom.cn>
Subject: Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity
from O(N) to O(1) inside inner loop
On 1/22/26 15:42, Qiliang Yuan wrote:
> By pre-calculating the base max utilization of each performance domain
> at the start of find_energy_efficient_cpu(), we can avoid the repetitive
> O(M) scan in eenv_pd_max_util() for every candidate CPU.
>
> This reduces the complexity of energy calculation from O(P*N) to O(N + P^2),
> where P is the number of performance domains. For systems with many
> performance domains or high core counts, this results in significant
> reduction in wakeup latency.
I don't think these are correct, but also
>
> Signed-off-by: Qiliang Yuan <realwujing@...il.com>
> Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
> ---
> kernel/sched/fair.c | 46 ++++++++++++++++++++++++-----------------
> kernel/sched/sched.h | 4 ++++
> kernel/sched/topology.c | 1 +
> 3 files changed, 32 insertions(+), 19 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 458324d240e9..de5bfdfe553f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8236,41 +8236,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
> * exceed @eenv->cpu_cap.
> */
> static inline unsigned long
> -eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
> +eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
> struct task_struct *p, int dst_cpu)
> {
> - unsigned long max_util = 0;
> - int cpu;
> + unsigned long max_util = pd->max_util;
>
> - for_each_cpu(cpu, pd_cpus) {
> - struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
> - unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
> + if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
> + unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
> unsigned long eff_util, min, max;
>
> - /*
> - * Performance domain frequency: utilization clamping
> - * must be considered since it affects the selection
> - * of the performance domain frequency.
> - * NOTE: in case RT tasks are running, by default the min
> - * utilization can be max OPP.
> - */
> - eff_util = effective_cpu_util(cpu, util, &min, &max);
> + eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
>
> /* Task's uclamp can modify min and max value */
> - if (tsk && uclamp_is_used()) {
> + if (uclamp_is_used()) {
> min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
>
> /*
> * If there is no active max uclamp constraint,
> * directly use task's one, otherwise keep max.
> */
> - if (uclamp_rq_is_idle(cpu_rq(cpu)))
> + if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
> max = uclamp_eff_value(p, UCLAMP_MAX);
> else
> max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
> }
>
> - eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
> + eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
> max_util = max(max_util, eff_util);
> }
>
> @@ -8286,7 +8277,7 @@ static inline unsigned long
> compute_energy(struct energy_env *eenv, struct perf_domain *pd,
> struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
> {
> - unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
> + unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
> unsigned long busy_time = eenv->pd_busy_time;
> unsigned long energy;
>
> @@ -8351,7 +8342,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
> unsigned long best_actual_cap = 0;
> unsigned long prev_actual_cap = 0;
> struct sched_domain *sd;
> - struct perf_domain *pd;
> + struct perf_domain *pd, *tmp_pd;
> struct energy_env eenv;
>
> rcu_read_lock();
> @@ -8377,6 +8368,23 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>
> eenv_task_busy_time(&eenv, p, prev_cpu);
>
> + /* Algorithmic Optimization: Pre-calculate max_util for O(1) energy estimation */
> + for (tmp_pd = pd; tmp_pd; tmp_pd = tmp_pd->next) {
> + unsigned long max_u = 0;
> + int i;
> +
> + for_each_cpu(i, perf_domain_span(tmp_pd)) {
> + unsigned long util = cpu_util(i, p, -1, 1);
> + unsigned long eff_util, min, max;
> +
> + eff_util = effective_cpu_util(i, util, &min, &max);
> + eff_util = sugov_effective_cpu_perf(i, eff_util, min, max);
> + if (eff_util > max_u)
> + max_u = eff_util;
> + }
> + tmp_pd->max_util = max_u;
You can't do this as there's no synchronisation for perf_domain apart from
the rcu_read_lock() here.
> + }
> +
> for (; pd; pd = pd->next) {
> unsigned long util_min = p_util_min, util_max = p_util_max;
> unsigned long cpu_cap, cpu_actual_cap, util;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index d30cca6870f5..f308d335ca77 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -972,6 +972,10 @@ struct perf_domain {
> struct em_perf_domain *em_pd;
> struct perf_domain *next;
> struct rcu_head rcu;
> +
> + /* O(1) optimization hints */
> + unsigned long max_util;
> + int max_spare_cap_cpu;
> };
>
> /*
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 5a3f29a26bdb..b9de022ddb53 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -354,6 +354,7 @@ static struct perf_domain *pd_init(int cpu)
> if (!pd)
> return NULL;
> pd->em_pd = obj;
> + pd->max_spare_cap_cpu = -1;
>
> return pd;
> }
Powered by blists - more mailing lists