lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAKfTPtDN-tzO9WTxT19L++yunLnFsPvKyfkAED0kn-6513zQgQ@mail.gmail.com>
Date: Tue, 3 Feb 2026 17:51:09 +0100
From: Vincent Guittot <vincent.guittot@...aro.org>
To: Christian Loehle <christian.loehle@....com>
Cc: linux-kernel@...r.kernel.org, peterz@...radead.org, mingo@...hat.com, 
	juri.lelli@...hat.com, dietmar.eggemann@....com, kprateek.nayak@....com, 
	pierre.gondois@....com
Subject: Re: [PATCH] sched/fair: Skip SCHED_IDLE rq for SCHED_IDLE task

On Mon, 2 Feb 2026 at 16:32, Christian Loehle <christian.loehle@....com> wrote:
>
> CPUs whose rq only have SCHED_IDLE tasks running are preferred over
> true idle CPUs in many cases, this is because they are guaranteed to

I'm not sure that sched idle cpus are preferred over truly idle cpu in
"many cases". They are considered at the same level most of the time.
The only diff is during fork and exec where we look for the CPU with
shallowest idle state and running sched idle task is the shallowest
idle state

> not be in an idle state and might even be in a higher P-state.
> This reasoning is based on the assumption that the task (e.g. wakee)
> gets to run there immediately and isn't sharing the rq.
> This however isn't true if the task has SCHED_IDLE policy itself, then
> we are better off to continue looking for a true idle CPU.

fair enough

>
> On a Intel Xeon 2-socket with 64 logical cores in total this yields
> for kernel compilation using SCHED_IDLE:
>
> +---------+----------------------+----------------------+--------+
> | workers | mainline (seconds)   | patch (seconds)      | delta% |
> +=========+======================+======================+========+
> |       1 | 4384.728 ± 21.085    | 3843.250 ± 16.235    | -12.35 |
> |       2 | 2242.513 ± 2.099     | 1971.696 ± 2.842     | -12.08 |
> |       4 | 1199.324 ± 1.823     | 1033.744 ± 1.803     | -13.81 |
> |       8 |  649.083 ± 1.959     |  559.123 ± 4.301     | -13.86 |
> |      16 |  370.425 ± 0.915     |  325.906 ± 4.623     | -12.02 |
> |      32 |  234.651 ± 2.255     |  217.266 ± 0.253     |  -7.41 |
> |      64 |  202.286 ± 1.452     |  197.977 ± 2.275     |  -2.13 |
> |     128 |  217.092 ± 1.687     |  212.164 ± 1.138     |  -2.27 |
> +---------+----------------------+----------------------+--------+
>
> Signed-off-by: Christian Loehle <christian.loehle@....com>
> ---
>  kernel/sched/fair.c | 28 +++++++++++++++-------------
>  1 file changed, 15 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 3eaeceda71b0..b29fa04958f0 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6832,9 +6832,10 @@ static int sched_idle_rq(struct rq *rq)
>                         rq->nr_running);
>  }
>
> -static int sched_idle_cpu(int cpu)
> +static int choose_idle_cpu(int cpu, struct task_struct *p)
>  {
> -       return sched_idle_rq(cpu_rq(cpu));
> +       return available_idle_cpu(cpu) ||
> +              (sched_idle_rq(cpu_rq(cpu)) && !task_has_idle_policy(p));

We have the pattern (sched_idle_rq(rq) && !task_has_idle_policy(p)) in
a number of places. Can we encapsulate it ?

static int choose_sched_idle_rq(struct rq *rq, struct task_struct *p)
{
        return sched_idle_rq(rq) && !task_has_idle_policy(p))
}

and use it above and and other locations below

>  }
>
>  static void
> @@ -7400,7 +7401,7 @@ sched_balance_find_dst_group_cpu(struct sched_group *group, struct task_struct *
>                 if (!sched_core_cookie_match(rq, p))
>                         continue;
>
> -               if (sched_idle_cpu(i))
> +               if (sched_idle_rq(rq) && !task_has_idle_policy(p))
>                         return i;
>
>                 if (available_idle_cpu(i)) {
> @@ -7491,8 +7492,7 @@ static inline int sched_balance_find_dst_cpu(struct sched_domain *sd, struct tas
>
>  static inline int __select_idle_cpu(int cpu, struct task_struct *p)
>  {
> -       if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) &&
> -           sched_cpu_cookie_match(cpu_rq(cpu), p))
> +       if (choose_idle_cpu(cpu, p) && sched_cpu_cookie_match(cpu_rq(cpu), p))
>                 return cpu;
>
>         return -1;
> @@ -7565,7 +7565,9 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
>                 if (!available_idle_cpu(cpu)) {
>                         idle = false;
>                         if (*idle_cpu == -1) {
> -                               if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) {
> +                               if (sched_idle_rq(cpu_rq(cpu)) &&
> +                                   !task_has_idle_policy(p) &&
> +                                   cpumask_test_cpu(cpu, cpus)) {
>                                         *idle_cpu = cpu;
>                                         break;
>                                 }
> @@ -7600,7 +7602,7 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
>                  */
>                 if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
>                         continue;
> -               if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
> +               if (choose_idle_cpu(cpu, p))
>                         return cpu;
>         }
>
> @@ -7722,7 +7724,7 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
>         for_each_cpu_wrap(cpu, cpus, target) {
>                 unsigned long cpu_cap = capacity_of(cpu);
>
> -               if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
> +               if (!choose_idle_cpu(cpu, p))
>                         continue;
>
>                 fits = util_fits_cpu(task_util, util_min, util_max, cpu);
> @@ -7793,7 +7795,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>          */
>         lockdep_assert_irqs_disabled();
>
> -       if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
> +       if (choose_idle_cpu(target, p) &&
>             asym_fits_cpu(task_util, util_min, util_max, target))
>                 return target;
>
> @@ -7801,7 +7803,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>          * If the previous CPU is cache affine and idle, don't be stupid:
>          */
>         if (prev != target && cpus_share_cache(prev, target) &&
> -           (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
> +           choose_idle_cpu(prev, p) &&
>             asym_fits_cpu(task_util, util_min, util_max, prev)) {
>
>                 if (!static_branch_unlikely(&sched_cluster_active) ||
> @@ -7833,7 +7835,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>         if (recent_used_cpu != prev &&
>             recent_used_cpu != target &&
>             cpus_share_cache(recent_used_cpu, target) &&
> -           (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
> +           choose_idle_cpu(recent_used_cpu, p) &&
>             cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
>             asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
>
> @@ -12261,7 +12263,7 @@ static void sched_balance_domains(struct rq *rq, enum cpu_idle_type idle)
>  {
>         int continue_balancing = 1;
>         int cpu = rq->cpu;
> -       int busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
> +       int busy = idle != CPU_IDLE && !sched_idle_rq(rq);
>         unsigned long interval;
>         struct sched_domain *sd;
>         /* Earliest time when we have to do rebalance again */
> @@ -12299,7 +12301,7 @@ static void sched_balance_domains(struct rq *rq, enum cpu_idle_type idle)
>                                  * state even if we migrated tasks. Update it.
>                                  */
>                                 idle = idle_cpu(cpu);
> -                               busy = !idle && !sched_idle_cpu(cpu);
> +                               busy = !idle && !sched_idle_rq(rq);
>                         }
>                         sd->last_balance = jiffies;
>                         interval = get_sd_balance_interval(sd, busy);
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ