[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <1b3c6f73-c6aa-43e6-a992-6a91c13c59cd@arm.com>
Date: Fri, 5 Dec 2025 15:52:56 +0000
From: Christian Loehle <christian.loehle@....com>
To: Vincent Guittot <vincent.guittot@...aro.org>, mingo@...hat.com,
peterz@...radead.org, juri.lelli@...hat.com, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
vschneid@...hat.com, linux-kernel@...r.kernel.org, pierre.gondois@....com,
kprateek.nayak@....com
Cc: qyousef@...alina.io, hongyan.xia2@....com, luis.machado@....com
Subject: Re: [RFC PATCH 5/6 v8] sched/fair: Enable idle core tracking for !SMT
On 12/2/25 18:12, Vincent Guittot wrote:
> Enable has_idle_cores at llc level feature for !SMT system for which
> CPU equals core.
>
> We don't enable has_idle_core feature of select_idle_cpu to be
> conservative and don't parse all CPUs of LLC.
>
> At now, has_idle_cores can be cleared even if a CPU is idle because of
> SIS_UTIL but it looks reasonnable as the probablity to get an idle CPU is
> low anyway.
>
> Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
> ---
> kernel/sched/fair.c | 29 +++++++----------------------
> kernel/sched/sched.h | 42 +++++++++++++++++++++++++++++-------------
> 2 files changed, 36 insertions(+), 35 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 252254168c92..0c0c675f39cf 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -7501,19 +7501,6 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
> return -1;
> }
>
> -#ifdef CONFIG_SCHED_SMT
> -DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> -EXPORT_SYMBOL_GPL(sched_smt_present);
> -
> -static inline void set_idle_cores(int cpu, int val)
> -{
> - struct sched_domain_shared *sds;
> -
> - sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
> - if (sds)
> - WRITE_ONCE(sds->has_idle_cores, val);
> -}
> -
> static inline bool test_idle_cores(int cpu)
> {
> struct sched_domain_shared *sds;
> @@ -7525,6 +7512,10 @@ static inline bool test_idle_cores(int cpu)
> return false;
> }
>
> +#ifdef CONFIG_SCHED_SMT
> +DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> +EXPORT_SYMBOL_GPL(sched_smt_present);
> +
> /*
> * Scans the local SMT mask to see if the entire core is idle, and records this
> * information in sd_llc_shared->has_idle_cores.
> @@ -7612,15 +7603,6 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
>
> #else /* !CONFIG_SCHED_SMT: */
>
> -static inline void set_idle_cores(int cpu, int val)
> -{
> -}
> -
> -static inline bool test_idle_cores(int cpu)
> -{
> - return false;
> -}
> -
> static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
> {
> return __select_idle_cpu(core, p);
> @@ -7886,6 +7868,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
> if ((unsigned)i < nr_cpumask_bits)
> return i;
>
a
> +
> /*
> * For cluster machines which have lower sharing cache like L2 or
> * LLC Tag, we tend to find an idle CPU in the target's cluster
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 697bd654298a..b9e228333d5e 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1591,19 +1591,6 @@ do { \
> flags = _raw_spin_rq_lock_irqsave(rq); \
> } while (0)
>
> -#ifdef CONFIG_SCHED_SMT
> -extern void __update_idle_core(struct rq *rq);
> -
> -static inline void update_idle_core(struct rq *rq)
> -{
> - if (static_branch_unlikely(&sched_smt_present))
> - __update_idle_core(rq);
> -}
> -
> -#else /* !CONFIG_SCHED_SMT: */
> -static inline void update_idle_core(struct rq *rq) { }
> -#endif /* !CONFIG_SCHED_SMT */
> -
> #ifdef CONFIG_FAIR_GROUP_SCHED
>
> static inline struct task_struct *task_of(struct sched_entity *se)
> @@ -2091,6 +2078,35 @@ static __always_inline bool sched_asym_cpucap_active(void)
> return static_branch_unlikely(&sched_asym_cpucapacity);
> }
>
> +static inline void set_idle_cores(int cpu, int val)
> +{
> + struct sched_domain_shared *sds;
> +
> + sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
> + if (sds)
> + WRITE_ONCE(sds->has_idle_cores, val);
> +}
FWIW this triggers
[ 0.172174] =============================
[ 0.172177] WARNING: suspicious RCU usage
[ 0.172179] 6.18.0-rc7-cix-build+ #215 Not tainted
[ 0.172184] Detected PIPT I-cache on CPU1
[ 0.178161] -----------------------------
[ 0.178163] kernel/sched/sched.h:2085 suspicious rcu_dereference_check() usage!
[ 0.178165]
other info that might help us debug this:
[ 0.178177] CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64MMFR1_EL1. Boot CPU: 0x1001111010312122, CPU1: 0x1001111011312122
[ 0.182211]
rcu_scheduler_active = 1, debug_locks = 1
[ 0.182213] 4 locks held by swapper/0/1:
[ 0.182224] CPU features: Unsupported CPU feature variation detected.
[ 0.186260] #0: ffff800082b2bf00
[ 0.186277] GICv3: CPU1: found redistributor 0 region 0:0x000000000e090000
[ 0.191101] (cpu_add_remove_lock){+.+.}-{4:4}, at: cpu_up+0x90/0x158
[ 0.191115] GICv3: CPU1: using allocated LPI pending table @0x0000000100330000
[ 0.195158] #1: ffff800082b2c0a0 (cpu_hotplug_lock
[ 0.195277] CPU1: Booted secondary processor 0x0000000000 [0x410fd801]
[ 0.199208] ){++++}-{0:0}, at: _cpu_up+0x58/0x268
[ 0.199213] #2: ffff800082ebddd0 (sparse_irq_lock){+.+.}-{4:4}, at: irq_lock_sparse+0x20/0x2c
[ 0.293548] #3: ffff0001feec1c18 (&rq->__lock){-...}-{2:2}, at: __schedule+0x144/0x1058
[ 0.301737]
stack backtrace:
[ 0.306136] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G S 6.18.0-rc7-cix-build+ #215 PREEMPT
[ 0.306141] Tainted: [S]=CPU_OUT_OF_SPEC
[ 0.306144] Call trace:
[ 0.306145] show_stack+0x18/0x24 (C)
[ 0.306150] dump_stack_lvl+0x90/0xd0
[ 0.306155] dump_stack+0x18/0x24
[ 0.306159] lockdep_rcu_suspicious+0x168/0x238
[ 0.306164] set_next_task_idle+0x144/0x148
[ 0.306167] __schedule+0xc50/0x1058
[ 0.306171] schedule+0x48/0x15c
[ 0.306173] schedule_timeout+0x90/0x128
[ 0.306177] wait_for_completion_timeout+0x88/0x13c
[ 0.306180] __cpu_up+0x80/0x1e4
[ 0.306186] bringup_cpu+0x48/0x2ac
[ 0.306189] cpuhp_invoke_callback+0x18c/0x358
[ 0.306191] __cpuhp_invoke_callback_range+0xf4/0x130
[ 0.306194] _cpu_up+0x150/0x268
[ 0.306196] cpu_up+0xcc/0x158
[ 0.306199] bringup_nonboot_cpus+0x84/0xcc
[ 0.306203] smp_init+0x30/0x8c
[ 0.306208] kernel_init_freeable+0x18c/0x504
[ 0.306215] kernel_init+0x20/0x1d8
[ 0.306218] ret_from_fork+0x10/0x20
on my machine...
Powered by blists - more mailing lists