linux-kernel - Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d230cb20-906d-44e7-87b7-9ec2e7c5a543@arm.com>
Date: Thu, 22 Jan 2026 16:08:21 +0000
From: Christian Loehle <christian.loehle@....com>
To: Qiliang Yuan <realwujing@...il.com>, Ingo Molnar <mingo@...hat.com>,
 Peter Zijlstra <peterz@...radead.org>, Juri Lelli <juri.lelli@...hat.com>,
 Vincent Guittot <vincent.guittot@...aro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@....com>,
 Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
 Mel Gorman <mgorman@...e.de>, Valentin Schneider <vschneid@...hat.com>,
 linux-kernel@...r.kernel.org, Qiliang Yuan <yuanql9@...natelecom.cn>
Subject: Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity
 from O(N) to O(1) inside inner loop

On 1/22/26 15:42, Qiliang Yuan wrote:
> By pre-calculating the base max utilization of each performance domain
> at the start of find_energy_efficient_cpu(), we can avoid the repetitive
> O(M) scan in eenv_pd_max_util() for every candidate CPU.
> 
> This reduces the complexity of energy calculation from O(P*N) to O(N + P^2),
> where P is the number of performance domains. For systems with many
> performance domains or high core counts, this results in significant
> reduction in wakeup latency.

I don't think these are correct, but also

> 
> Signed-off-by: Qiliang Yuan <realwujing@...il.com>
> Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
> ---
>  kernel/sched/fair.c     | 46 ++++++++++++++++++++++++-----------------
>  kernel/sched/sched.h    |  4 ++++
>  kernel/sched/topology.c |  1 +
>  3 files changed, 32 insertions(+), 19 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 458324d240e9..de5bfdfe553f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8236,41 +8236,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
>   * exceed @eenv->cpu_cap.
>   */
>  static inline unsigned long
> -eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
> +eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
>  		 struct task_struct *p, int dst_cpu)
>  {
> -	unsigned long max_util = 0;
> -	int cpu;
> +	unsigned long max_util = pd->max_util;
>  
> -	for_each_cpu(cpu, pd_cpus) {
> -		struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
> -		unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
> +	if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
> +		unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
>  		unsigned long eff_util, min, max;
>  
> -		/*
> -		 * Performance domain frequency: utilization clamping
> -		 * must be considered since it affects the selection
> -		 * of the performance domain frequency.
> -		 * NOTE: in case RT tasks are running, by default the min
> -		 * utilization can be max OPP.
> -		 */
> -		eff_util = effective_cpu_util(cpu, util, &min, &max);
> +		eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
>  
>  		/* Task's uclamp can modify min and max value */
> -		if (tsk && uclamp_is_used()) {
> +		if (uclamp_is_used()) {
>  			min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
>  
>  			/*
>  			 * If there is no active max uclamp constraint,
>  			 * directly use task's one, otherwise keep max.
>  			 */
> -			if (uclamp_rq_is_idle(cpu_rq(cpu)))
> +			if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
>  				max = uclamp_eff_value(p, UCLAMP_MAX);
>  			else
>  				max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
>  		}
>  
> -		eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
> +		eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
>  		max_util = max(max_util, eff_util);
>  	}
>  
> @@ -8286,7 +8277,7 @@ static inline unsigned long
>  compute_energy(struct energy_env *eenv, struct perf_domain *pd,
>  	       struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
>  {
> -	unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
> +	unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
>  	unsigned long busy_time = eenv->pd_busy_time;
>  	unsigned long energy;
>  
> @@ -8351,7 +8342,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  	unsigned long best_actual_cap = 0;
>  	unsigned long prev_actual_cap = 0;
>  	struct sched_domain *sd;
> -	struct perf_domain *pd;
> +	struct perf_domain *pd, *tmp_pd;
>  	struct energy_env eenv;
>  
>  	rcu_read_lock();
> @@ -8377,6 +8368,23 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  
>  	eenv_task_busy_time(&eenv, p, prev_cpu);
>  
> +	/* Algorithmic Optimization: Pre-calculate max_util for O(1) energy estimation */
> +	for (tmp_pd = pd; tmp_pd; tmp_pd = tmp_pd->next) {
> +		unsigned long max_u = 0;
> +		int i;
> +
> +		for_each_cpu(i, perf_domain_span(tmp_pd)) {
> +			unsigned long util = cpu_util(i, p, -1, 1);
> +			unsigned long eff_util, min, max;
> +
> +			eff_util = effective_cpu_util(i, util, &min, &max);
> +			eff_util = sugov_effective_cpu_perf(i, eff_util, min, max);
> +			if (eff_util > max_u)
> +				max_u = eff_util;
> +		}
> +		tmp_pd->max_util = max_u;

You can't do this as there's no synchronisation for perf_domain apart from
the rcu_read_lock() here.

> +	}
> +
>  	for (; pd; pd = pd->next) {
>  		unsigned long util_min = p_util_min, util_max = p_util_max;
>  		unsigned long cpu_cap, cpu_actual_cap, util;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index d30cca6870f5..f308d335ca77 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -972,6 +972,10 @@ struct perf_domain {
>  	struct em_perf_domain *em_pd;
>  	struct perf_domain *next;
>  	struct rcu_head rcu;
> +
> +	/* O(1) optimization hints */
> +	unsigned long	   max_util;
> +	int		   max_spare_cap_cpu;
>  };
>  
>  /*
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 5a3f29a26bdb..b9de022ddb53 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -354,6 +354,7 @@ static struct perf_domain *pd_init(int cpu)
>  	if (!pd)
>  		return NULL;
>  	pd->em_pd = obj;
> +	pd->max_spare_cap_cpu = -1;
>  
>  	return pd;
>  }