lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260122152024.124979-1-realwujing@gmail.com>
Date: Thu, 22 Jan 2026 10:20:24 -0500
From: Qiliang Yuan <realwujing@...il.com>
To: mingo@...hat.com,
	peterz@...radead.org,
	juri.lelli@...hat.com,
	vincent.guittot@...aro.org
Cc: dietmar.eggemann@....com,
	rostedt@...dmis.org,
	bsegall@...gle.com,
	mgorman@...e.de,
	vschneid@...hat.com,
	linux-kernel@...r.kernel.org,
	Qiliang Yuan <realwujing@...il.com>,
	Qiliang Yuan <yuanql9@...natelecom.cn>
Subject: [PATCH] sched/fair: Optimize idle core discovery algorithm via LLC-wide bitmask

The current select_idle_cpu() employs a linear O(N) scan to find idle
cores, which scales poorly on modern high-core-count systems.

This patch optimizes the discovery algorithm by:
1. Adding a per-LLC 'idle_cores_mask' to sched_domain_shared for
   tracking core-level idle status.
2. Converting the linear search into an efficient bitmask iteration,
   reducing typical search complexity towards O(1) in sparse scenarios.
3. Implementing a lazy-clear mechanism during the scan to maintain
   efficiency without expensive global synchronization.

This algorithmic refinement minimizes the search space in the critical
wakeup path, effectively mitigating linear scaling overhead on large
SMT machines.

Signed-off-by: Qiliang Yuan <realwujing@...il.com>
Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
---
 include/linux/sched/topology.h |  1 +
 kernel/sched/fair.c            | 49 ++++++++++++++++++++++++----------
 kernel/sched/topology.c        |  2 +-
 3 files changed, 37 insertions(+), 15 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 45c0022b91ce..4223f2cd7eb8 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -68,6 +68,7 @@ struct sched_domain_shared {
 	atomic_t	nr_busy_cpus;
 	int		has_idle_cores;
 	int		nr_idle_scan;
+	unsigned long	idle_cores_mask[];
 };
 
 struct sched_domain {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..458324d240e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7507,8 +7507,13 @@ static inline void set_idle_cores(int cpu, int val)
 	struct sched_domain_shared *sds;
 
 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
-	if (sds)
+	if (sds) {
 		WRITE_ONCE(sds->has_idle_cores, val);
+		if (val)
+			cpumask_set_cpu(cpu, (struct cpumask *)sds->idle_cores_mask);
+		else
+			cpumask_clear((struct cpumask *)sds->idle_cores_mask);
+	}
 }
 
 static inline bool test_idle_cores(int cpu)
@@ -7678,23 +7683,39 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
 		}
 	}
 
-	for_each_cpu_wrap(cpu, cpus, target + 1) {
-		if (has_idle_core) {
-			i = select_idle_core(p, cpu, cpus, &idle_cpu);
-			if ((unsigned int)i < nr_cpumask_bits)
-				return i;
+	if (has_idle_core) {
+		sd_share = rcu_dereference(per_cpu(sd_llc_shared, target));
+		if (sd_share) {
+			for_each_cpu_wrap(cpu, (struct cpumask *)sd_share->idle_cores_mask, target + 1) {
+				if (!cpumask_test_cpu(cpu, cpus))
+					continue;
 
-		} else {
-			if (--nr <= 0)
-				return -1;
-			idle_cpu = __select_idle_cpu(cpu, p);
-			if ((unsigned int)idle_cpu < nr_cpumask_bits)
-				break;
+				i = select_idle_core(p, cpu, cpus, &idle_cpu);
+				if ((unsigned int)i < nr_cpumask_bits)
+					return i;
+
+				/* Core is no longer idle, clear the hint */
+				cpumask_clear_cpu(cpu, (struct cpumask *)sd_share->idle_cores_mask);
+			}
+
+			/* Searched all hinted cores and found none */
+			set_idle_cores(target, false);
 		}
+		/* If we found an idle CPU during core search, return it */
+		if (idle_cpu != -1)
+			return idle_cpu;
+
+		/* Fall through to any-CPU search if needed */
+		has_idle_core = false;
 	}
 
-	if (has_idle_core)
-		set_idle_cores(target, false);
+	for_each_cpu_wrap(cpu, cpus, target + 1) {
+		if (--nr <= 0)
+			return -1;
+		idle_cpu = __select_idle_cpu(cpu, p);
+		if ((unsigned int)idle_cpu < nr_cpumask_bits)
+			break;
+	}
 
 	return idle_cpu;
 }
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index cf643a5ddedd..5a3f29a26bdb 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2392,7 +2392,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 			*per_cpu_ptr(sdd->sd, j) = sd;
 
-			sds = kzalloc_node(sizeof(struct sched_domain_shared),
+			sds = kzalloc_node(sizeof(struct sched_domain_shared) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
 			if (!sds)
 				return -ENOMEM;
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ