lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAKfTPtD1N3qooNFxcJZVGFFMgvb0J2W8w7xJffZ-Pqziy5zZ1g@mail.gmail.com>
Date: Sat, 6 Dec 2025 11:18:09 +0100
From: Vincent Guittot <vincent.guittot@...aro.org>
To: "Chen, Yu C" <yu.c.chen@...el.com>
Cc: Christian Loehle <christian.loehle@....com>, qyousef@...alina.io, hongyan.xia2@....com, 
	luis.machado@....com, mingo@...hat.com, peterz@...radead.org, 
	dietmar.eggemann@....com, juri.lelli@...hat.com, rostedt@...dmis.org, 
	bsegall@...gle.com, mgorman@...e.de, vschneid@...hat.com, 
	pierre.gondois@....com, kprateek.nayak@....com, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 5/6 v8] sched/fair: Enable idle core tracking for !SMT

On Sat, 6 Dec 2025 at 03:11, Chen, Yu C <yu.c.chen@...el.com> wrote:
>
> On 12/5/2025 11:52 PM, Christian Loehle wrote:
> > On 12/2/25 18:12, Vincent Guittot wrote:
> >> Enable has_idle_cores at llc level feature for !SMT system for which
> >> CPU equals core.
> >>
> >> We don't enable has_idle_core feature of select_idle_cpu to be
> >> conservative and don't parse all CPUs of LLC.
> >>
> >> At now, has_idle_cores can be cleared even if a CPU is idle because of
> >> SIS_UTIL but it looks reasonnable as the probablity to get an idle CPU is
> >> low anyway.
> >>
> >> Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
> >> ---
> >>   kernel/sched/fair.c  | 29 +++++++----------------------
> >>   kernel/sched/sched.h | 42 +++++++++++++++++++++++++++++-------------
> >>   2 files changed, 36 insertions(+), 35 deletions(-)
> >>
> >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> >> index 252254168c92..0c0c675f39cf 100644
> >> --- a/kernel/sched/fair.c
> >> +++ b/kernel/sched/fair.c
> >> @@ -7501,19 +7501,6 @@ static inline int __select_idle_cpu(int cpu, struct task_struct *p)
> >>      return -1;
> >>   }
> >>
> >> -#ifdef CONFIG_SCHED_SMT
> >> -DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> >> -EXPORT_SYMBOL_GPL(sched_smt_present);
> >> -
> >> -static inline void set_idle_cores(int cpu, int val)
> >> -{
> >> -    struct sched_domain_shared *sds;
> >> -
> >> -    sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
> >> -    if (sds)
> >> -            WRITE_ONCE(sds->has_idle_cores, val);
> >> -}
> >> -
> >>   static inline bool test_idle_cores(int cpu)
> >>   {
> >>      struct sched_domain_shared *sds;
> >> @@ -7525,6 +7512,10 @@ static inline bool test_idle_cores(int cpu)
> >>      return false;
> >>   }
> >>
> >> +#ifdef CONFIG_SCHED_SMT
> >> +DEFINE_STATIC_KEY_FALSE(sched_smt_present);
> >> +EXPORT_SYMBOL_GPL(sched_smt_present);
> >> +
> >>   /*
> >>    * Scans the local SMT mask to see if the entire core is idle, and records this
> >>    * information in sd_llc_shared->has_idle_cores.
> >> @@ -7612,15 +7603,6 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
> >>
> >>   #else /* !CONFIG_SCHED_SMT: */
> >>
> >> -static inline void set_idle_cores(int cpu, int val)
> >> -{
> >> -}
> >> -
> >> -static inline bool test_idle_cores(int cpu)
> >> -{
> >> -    return false;
> >> -}
> >> -
> >>   static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu)
> >>   {
> >>      return __select_idle_cpu(core, p);
> >> @@ -7886,6 +7868,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
> >>      if ((unsigned)i < nr_cpumask_bits)
> >>              return i;
> >>
> > a
> >> +
> >>      /*
> >>       * For cluster machines which have lower sharing cache like L2 or
> >>       * LLC Tag, we tend to find an idle CPU in the target's cluster
> >> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> >> index 697bd654298a..b9e228333d5e 100644
> >> --- a/kernel/sched/sched.h
> >> +++ b/kernel/sched/sched.h
> >> @@ -1591,19 +1591,6 @@ do {                                          \
> >>      flags = _raw_spin_rq_lock_irqsave(rq);  \
> >>   } while (0)
> >>
> >> -#ifdef CONFIG_SCHED_SMT
> >> -extern void __update_idle_core(struct rq *rq);
> >> -
> >> -static inline void update_idle_core(struct rq *rq)
> >> -{
> >> -    if (static_branch_unlikely(&sched_smt_present))
> >> -            __update_idle_core(rq);
> >> -}
> >> -
> >> -#else /* !CONFIG_SCHED_SMT: */
> >> -static inline void update_idle_core(struct rq *rq) { }
> >> -#endif /* !CONFIG_SCHED_SMT */
> >> -
> >>   #ifdef CONFIG_FAIR_GROUP_SCHED
> >>
> >>   static inline struct task_struct *task_of(struct sched_entity *se)
> >> @@ -2091,6 +2078,35 @@ static __always_inline bool sched_asym_cpucap_active(void)
> >>      return static_branch_unlikely(&sched_asym_cpucapacity);
> >>   }
> >>
> >> +static inline void set_idle_cores(int cpu, int val)
> >> +{
> >> +    struct sched_domain_shared *sds;
> >> +
> >> +    sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
> >> +    if (sds)
> >> +            WRITE_ONCE(sds->has_idle_cores, val);
> >> +}
> >
> > FWIW this triggers
> > [    0.172174] =============================
> > [    0.172177] WARNING: suspicious RCU usage
> > [    0.172179] 6.18.0-rc7-cix-build+ #215 Not tainted
> > [    0.172184] Detected PIPT I-cache on CPU1
> > [    0.178161] -----------------------------
> > [    0.178163] kernel/sched/sched.h:2085 suspicious rcu_dereference_check() usage!
> > [    0.178165]
> >                 other info that might help us debug this:
> >
> > [    0.178177] CPU features: SANITY CHECK: Unexpected variation in SYS_ID_AA64MMFR1_EL1. Boot CPU: 0x1001111010312122, CPU1: 0x1001111011312122
> > [    0.182211]
> >                 rcu_scheduler_active = 1, debug_locks = 1
> > [    0.182213] 4 locks held by swapper/0/1:
> > [    0.182224] CPU features: Unsupported CPU feature variation detected.
> > [    0.186260]  #0: ffff800082b2bf00
> > [    0.186277] GICv3: CPU1: found redistributor 0 region 0:0x000000000e090000
> > [    0.191101]  (cpu_add_remove_lock){+.+.}-{4:4}, at: cpu_up+0x90/0x158
> > [    0.191115] GICv3: CPU1: using allocated LPI pending table @0x0000000100330000
> > [    0.195158]  #1: ffff800082b2c0a0 (cpu_hotplug_lock
> > [    0.195277] CPU1: Booted secondary processor 0x0000000000 [0x410fd801]
> > [    0.199208] ){++++}-{0:0}, at: _cpu_up+0x58/0x268
> > [    0.199213]  #2: ffff800082ebddd0 (sparse_irq_lock){+.+.}-{4:4}, at: irq_lock_sparse+0x20/0x2c
> > [    0.293548]  #3: ffff0001feec1c18 (&rq->__lock){-...}-{2:2}, at: __schedule+0x144/0x1058
> > [    0.301737]
> >                 stack backtrace:
> > [    0.306136] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G S                  6.18.0-rc7-cix-build+ #215 PREEMPT
> > [    0.306141] Tainted: [S]=CPU_OUT_OF_SPEC
> > [    0.306144] Call trace:
> > [    0.306145]  show_stack+0x18/0x24 (C)
> > [    0.306150]  dump_stack_lvl+0x90/0xd0
> > [    0.306155]  dump_stack+0x18/0x24
> > [    0.306159]  lockdep_rcu_suspicious+0x168/0x238
> > [    0.306164]  set_next_task_idle+0x144/0x148
> > [    0.306167]  __schedule+0xc50/0x1058
> > [    0.306171]  schedule+0x48/0x15c
> > [    0.306173]  schedule_timeout+0x90/0x128
> > [    0.306177]  wait_for_completion_timeout+0x88/0x13c
> > [    0.306180]  __cpu_up+0x80/0x1e4
> > [    0.306186]  bringup_cpu+0x48/0x2ac
> > [    0.306189]  cpuhp_invoke_callback+0x18c/0x358
> > [    0.306191]  __cpuhp_invoke_callback_range+0xf4/0x130
> > [    0.306194]  _cpu_up+0x150/0x268
> > [    0.306196]  cpu_up+0xcc/0x158
> > [    0.306199]  bringup_nonboot_cpus+0x84/0xcc
> > [    0.306203]  smp_init+0x30/0x8c
> > [    0.306208]  kernel_init_freeable+0x18c/0x504
> > [    0.306215]  kernel_init+0x20/0x1d8
> > [    0.306218]  ret_from_fork+0x10/0x20
> >
> >
> > on my machine...
> >
>
> update_idle_core() might need to deal with rcu protection in the original
> code, maybe something like this would help:

fair enough

>
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 9dfabaa314b1..4c9348075abf 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2094,17 +2094,22 @@ extern void __update_idle_core(struct rq *rq);
>
>   static inline void update_idle_core(struct rq *rq)
>   {
> -       if (static_branch_unlikely(&sched_smt_present))
> +       if (static_branch_unlikely(&sched_smt_present)) {
>                  __update_idle_core(rq);
> -       else
> +       } else {
> +               rcu_read_lock();

I will move it up and include __update_idle_core() and remove those in
this latter

>                  set_idle_cores(cpu_of(rq), 1);
> +               rcu_read_unlock();
> +       }
>
>   }
>
>   #else /* !CONFIG_SCHED_SMT: */
>   static inline void update_idle_core(struct rq *rq)
>   {
> +       rcu_read_lock();
>          set_idle_cores(cpu_of(rq), 1);
> +       rcu_read_unlock();
>   }
>   #endif /* !CONFIG_SCHED_SMT */
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ