[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260122152024.124979-1-realwujing@gmail.com>
Date: Thu, 22 Jan 2026 10:20:24 -0500
From: Qiliang Yuan <realwujing@...il.com>
To: mingo@...hat.com,
peterz@...radead.org,
juri.lelli@...hat.com,
vincent.guittot@...aro.org
Cc: dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
linux-kernel@...r.kernel.org,
Qiliang Yuan <realwujing@...il.com>,
Qiliang Yuan <yuanql9@...natelecom.cn>
Subject: [PATCH] sched/fair: Optimize idle core discovery algorithm via LLC-wide bitmask
The current select_idle_cpu() employs a linear O(N) scan to find idle
cores, which scales poorly on modern high-core-count systems.
This patch optimizes the discovery algorithm by:
1. Adding a per-LLC 'idle_cores_mask' to sched_domain_shared for
tracking core-level idle status.
2. Converting the linear search into an efficient bitmask iteration,
reducing typical search complexity towards O(1) in sparse scenarios.
3. Implementing a lazy-clear mechanism during the scan to maintain
efficiency without expensive global synchronization.
This algorithmic refinement minimizes the search space in the critical
wakeup path, effectively mitigating linear scaling overhead on large
SMT machines.
Signed-off-by: Qiliang Yuan <realwujing@...il.com>
Signed-off-by: Qiliang Yuan <yuanql9@...natelecom.cn>
---
include/linux/sched/topology.h | 1 +
kernel/sched/fair.c | 49 ++++++++++++++++++++++++----------
kernel/sched/topology.c | 2 +-
3 files changed, 37 insertions(+), 15 deletions(-)
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 45c0022b91ce..4223f2cd7eb8 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -68,6 +68,7 @@ struct sched_domain_shared {
atomic_t nr_busy_cpus;
int has_idle_cores;
int nr_idle_scan;
+ unsigned long idle_cores_mask[];
};
struct sched_domain {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..458324d240e9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7507,8 +7507,13 @@ static inline void set_idle_cores(int cpu, int val)
struct sched_domain_shared *sds;
sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
- if (sds)
+ if (sds) {
WRITE_ONCE(sds->has_idle_cores, val);
+ if (val)
+ cpumask_set_cpu(cpu, (struct cpumask *)sds->idle_cores_mask);
+ else
+ cpumask_clear((struct cpumask *)sds->idle_cores_mask);
+ }
}
static inline bool test_idle_cores(int cpu)
@@ -7678,23 +7683,39 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
}
}
- for_each_cpu_wrap(cpu, cpus, target + 1) {
- if (has_idle_core) {
- i = select_idle_core(p, cpu, cpus, &idle_cpu);
- if ((unsigned int)i < nr_cpumask_bits)
- return i;
+ if (has_idle_core) {
+ sd_share = rcu_dereference(per_cpu(sd_llc_shared, target));
+ if (sd_share) {
+ for_each_cpu_wrap(cpu, (struct cpumask *)sd_share->idle_cores_mask, target + 1) {
+ if (!cpumask_test_cpu(cpu, cpus))
+ continue;
- } else {
- if (--nr <= 0)
- return -1;
- idle_cpu = __select_idle_cpu(cpu, p);
- if ((unsigned int)idle_cpu < nr_cpumask_bits)
- break;
+ i = select_idle_core(p, cpu, cpus, &idle_cpu);
+ if ((unsigned int)i < nr_cpumask_bits)
+ return i;
+
+ /* Core is no longer idle, clear the hint */
+ cpumask_clear_cpu(cpu, (struct cpumask *)sd_share->idle_cores_mask);
+ }
+
+ /* Searched all hinted cores and found none */
+ set_idle_cores(target, false);
}
+ /* If we found an idle CPU during core search, return it */
+ if (idle_cpu != -1)
+ return idle_cpu;
+
+ /* Fall through to any-CPU search if needed */
+ has_idle_core = false;
}
- if (has_idle_core)
- set_idle_cores(target, false);
+ for_each_cpu_wrap(cpu, cpus, target + 1) {
+ if (--nr <= 0)
+ return -1;
+ idle_cpu = __select_idle_cpu(cpu, p);
+ if ((unsigned int)idle_cpu < nr_cpumask_bits)
+ break;
+ }
return idle_cpu;
}
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index cf643a5ddedd..5a3f29a26bdb 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2392,7 +2392,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
*per_cpu_ptr(sdd->sd, j) = sd;
- sds = kzalloc_node(sizeof(struct sched_domain_shared),
+ sds = kzalloc_node(sizeof(struct sched_domain_shared) + cpumask_size(),
GFP_KERNEL, cpu_to_node(j));
if (!sds)
return -ENOMEM;
--
2.51.0
Powered by blists - more mailing lists