lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20260204120509.3950227-1-realwujing@gmail.com>
Date: Wed,  4 Feb 2026 07:05:05 -0500
From: Qiliang Yuan <realwujing@...il.com>
To: Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Juri Lelli <juri.lelli@...hat.com>,
	Vincent Guittot <vincent.guittot@...aro.org>
Cc: Qiliang Yuan <realwujing@...il.com>,
	Qiliang Yuan <yuanql9@...natelecom.cn>,
	Dietmar Eggemann <dietmar.eggemann@....com>,
	Steven Rostedt <rostedt@...dmis.org>,
	Ben Segall <bsegall@...gle.com>,
	Mel Gorman <mgorman@...e.de>,
	Valentin Schneider <vschneid@...hat.com>,
	linux-kernel@...r.kernel.org
Subject: [PATCH v3] sched/fair: Optimize EAS by reducing redundant performance domain scans

Consolidate performance domain (PD) statistic calculations in the
find_energy_efficient_cpu() wake-up path.

Calculate 'pd_max_util' and 'pd_busy_time' during the initial CPU
iteration within the performance domain. Cache these values in the local
'energy_env' structure to eliminate subsequent redundancy. This reduces
the number of full PD scans from three to one per performance domain.

This optimization significantly lowers the constant factor of the
Energy-Aware Scheduling calculation, minimizing wake-up latency
on systems with large performance domains or complex topologies.

Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
Signed-off-by: Qiliang Yuan <realwujing@...il.com>
---
v3:
 - Further optimize by consolidating pd_busy_time calculation into the
   main loop, reducing PD scans from 3 to 1.
 - Rename patch title to accurately reflect "reducing redundant scans"
   instead of a total complexity change from O(N) to O(1), addressing
   reviewers' feedback.
v2:
 - Ensure RCU safety by using local 'energy_env' for caching instead of
   modifying the shared 'perf_domain' structure.
 - Consolidate pre-calculation into the main loop to avoid an extra pass
   over the performance domains.
v1:
 - Initial optimization of energy calculation by pre-calculating
   performance domain max utilization.

 kernel/sched/fair.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..4ed10cb9e8e0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8148,6 +8148,7 @@ struct energy_env {
 	unsigned long pd_busy_time;
 	unsigned long cpu_cap;
 	unsigned long pd_cap;
+	unsigned long pd_max_util;
 };
 
 /*
@@ -8215,41 +8216,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
  * exceed @eenv->cpu_cap.
  */
 static inline unsigned long
-eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
 		 struct task_struct *p, int dst_cpu)
 {
-	unsigned long max_util = 0;
-	int cpu;
+	unsigned long max_util = eenv->pd_max_util;
 
-	for_each_cpu(cpu, pd_cpus) {
-		struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
-		unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
+	if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
+		unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
 		unsigned long eff_util, min, max;
 
-		/*
-		 * Performance domain frequency: utilization clamping
-		 * must be considered since it affects the selection
-		 * of the performance domain frequency.
-		 * NOTE: in case RT tasks are running, by default the min
-		 * utilization can be max OPP.
-		 */
-		eff_util = effective_cpu_util(cpu, util, &min, &max);
+		eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
 
 		/* Task's uclamp can modify min and max value */
-		if (tsk && uclamp_is_used()) {
+		if (uclamp_is_used()) {
 			min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
 
 			/*
 			 * If there is no active max uclamp constraint,
 			 * directly use task's one, otherwise keep max.
 			 */
-			if (uclamp_rq_is_idle(cpu_rq(cpu)))
+			if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
 				max = uclamp_eff_value(p, UCLAMP_MAX);
 			else
 				max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
 		}
 
-		eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
+		eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
 		max_util = max(max_util, eff_util);
 	}
 
@@ -8265,7 +8257,7 @@ static inline unsigned long
 compute_energy(struct energy_env *eenv, struct perf_domain *pd,
 	       struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
 {
-	unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+	unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
 	unsigned long busy_time = eenv->pd_busy_time;
 	unsigned long energy;
 
@@ -8376,12 +8368,26 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
 		eenv.cpu_cap = cpu_actual_cap;
 		eenv.pd_cap = 0;
+		eenv.pd_max_util = 0;
+		eenv.pd_busy_time = 0;
 
 		for_each_cpu(cpu, cpus) {
 			struct rq *rq = cpu_rq(cpu);
+			unsigned long util_b, eff_util_b, min_b, max_b;
+			unsigned long util_bt;
 
 			eenv.pd_cap += cpu_actual_cap;
 
+			/* Pre-calculate base max utilization for the performance domain */
+			util_b = cpu_util(cpu, p, -1, 1);
+			eff_util_b = effective_cpu_util(cpu, util_b, &min_b, &max_b);
+			eff_util_b = sugov_effective_cpu_perf(cpu, eff_util_b, min_b, max_b);
+			eenv.pd_max_util = max(eenv.pd_max_util, eff_util_b);
+
+			/* Pre-calculate base busy time for the performance domain */
+			util_bt = cpu_util(cpu, p, -1, 0);
+			eenv.pd_busy_time += effective_cpu_util(cpu, util_bt, NULL, NULL);
+
 			if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
 				continue;
 
@@ -8439,7 +8445,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		if (max_spare_cap_cpu < 0 && prev_spare_cap < 0)
 			continue;
 
-		eenv_pd_busy_time(&eenv, cpus, p);
+		eenv.pd_busy_time = min(eenv.pd_cap, eenv.pd_busy_time);
 		/* Compute the 'base' energy of the pd, without @p */
 		base_energy = compute_energy(&eenv, pd, cpus, p, -1);
 
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ