linux-kernel - Re: [RESEND PATCH v5 2/2] sched/fair: Scan cluster before scanning LLC in wake-up path

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4zyXLxPqd+WrWVF1fYan=dtacG6DqYbpFSNPGkESXbxtA@mail.gmail.com>
Date:   Wed, 20 Jul 2022 23:33:39 +1200
From:   Barry Song <21cnbao@...il.com>
To:     Peter Zijlstra <peterz@...radead.org>
Cc:     Yicong Yang <yangyicong@...ilicon.com>,
        Ingo Molnar <mingo@...hat.com>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Tim Chen <tim.c.chen@...ux.intel.com>,
        "Gautham R. Shenoy" <gautham.shenoy@....com>,
        LKML <linux-kernel@...r.kernel.org>,
        LAK <linux-arm-kernel@...ts.infradead.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        prime.zeng@...wei.com,
        Jonathan Cameron <jonathan.cameron@...wei.com>,
        ego@...ux.vnet.ibm.com,
        Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
        Linuxarm <linuxarm@...wei.com>,
        Guodong Xu <guodong.xu@...aro.org>, hesham.almatary@...wei.com,
        john.garry@...wei.com, Yang Shen <shenyang39@...wei.com>,
        kprateek.nayak@....com, Chen Yu <yu.c.chen@...el.com>,
        wuyun.abel@...edance.com
Subject: Re: [RESEND PATCH v5 2/2] sched/fair: Scan cluster before scanning
 LLC in wake-up path

On Wed, Jul 20, 2022 at 11:15 PM Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Wed, Jul 20, 2022 at 04:11:50PM +0800, Yicong Yang wrote:
> > +     /* TODO: Support SMT system with cluster topology */
> > +     if (!sched_smt_active() && sd) {
> > +             for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) {
>
> So that's no SMT and no wrap iteration..
>
> Does something like this work?

I guess it is ok. The main reason for me to disable smt from the first beginning
is that we don't really have a machine with both cluster and smt to
reach the code
for smt, aka has_idle_core=true. so I thought I was going to bring up
that branch
when I really have a machine. so, it is marked as TODO.

till now,  neither kunpeng920 nor Jacobsville is smt.

But I don't mind if it is enabled now.

>
> ---
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6437,6 +6437,30 @@ static int select_idle_cpu(struct task_s
>                 }
>         }
>
> +       if (IS_ENABLED(CONFIG_SCHED_CLUSTER) &&
> +           static_branch_unlikely(&sched_cluster_active)) {
> +               struct sched_domain *sdc = rcu_dereference(per_cpu(sd_cluster, target));
> +               if (sdc) {
> +                       for_each_cpu_wrap(cpu, sched_domain_span(sdc), target + 1) {
> +                               if (!cpumask_test_cpu(cpu, cpus))
> +                                       continue;
> +
> +                               if (has_idle_core) {
> +                                       i = select_idle_core(p, cpu, cpus, &idle_cpu);
> +                                       if ((unsigned int)i < nr_cpumask_bits)
> +                                               return i;
> +                               } else {
> +                                       if (--nr <= 0)
> +                                               return -1;
> +                                       idle_cpu = __select_idle_cpu(cpu, p);
> +                                       if ((unsigned int)idle_cpu < nr_cpumask_bits)
> +                                               break;
> +                               }
> +                       }
> +                       cpumask_andnot(cpus, cpus, sched_domain_span(sdc));
> +               }
> +       }
> +
>         for_each_cpu_wrap(cpu, cpus, target + 1) {
>                 if (has_idle_core) {
>                         i = select_idle_core(p, cpu, cpus, &idle_cpu);
> @@ -6444,7 +6468,7 @@ static int select_idle_cpu(struct task_s
>                                 return i;
>
>                 } else {
> -                       if (!--nr)
> +                       if (--nr <= 0)
>                                 return -1;
>                         idle_cpu = __select_idle_cpu(cpu, p);
>                         if ((unsigned int)idle_cpu < nr_cpumask_bits)
> @@ -6543,7 +6567,7 @@ static int select_idle_sibling(struct ta
>         /*
>          * If the previous CPU is cache affine and idle, don't be stupid:
>          */
> -       if (prev != target && cpus_share_cache(prev, target) &&
> +       if (prev != target && cpus_share_lowest_cache(prev, target) &&
>             (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
>             asym_fits_capacity(task_util, prev))
>                 return prev;
> @@ -6569,7 +6593,7 @@ static int select_idle_sibling(struct ta
>         p->recent_used_cpu = prev;
>         if (recent_used_cpu != prev &&
>             recent_used_cpu != target &&
> -           cpus_share_cache(recent_used_cpu, target) &&
> +           cpus_share_lowest_cache(recent_used_cpu, target) &&
>             (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
>             cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
>             asym_fits_capacity(task_util, recent_used_cpu)) {
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1813,7 +1813,9 @@ DECLARE_PER_CPU(struct sched_domain __rc
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
> +
>  extern struct static_key_false sched_asym_cpucapacity;
> +extern struct static_key_false sched_cluster_active;
>
>  struct sched_group_capacity {
>         atomic_t                ref;
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -670,7 +670,9 @@ DEFINE_PER_CPU(struct sched_domain_share
>  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
>  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
>  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
> +
>  DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
> +DEFINE_STATIC_KEY_FALSE(sched_cluster_active);
>
>  static void update_top_cache_domain(int cpu)
>  {
> @@ -2268,6 +2270,7 @@ build_sched_domains(const struct cpumask
>         struct rq *rq = NULL;
>         int i, ret = -ENOMEM;
>         bool has_asym = false;
> +       bool has_cluster = false;
>
>         if (WARN_ON(cpumask_empty(cpu_map)))
>                 goto error;
> @@ -2289,6 +2292,7 @@ build_sched_domains(const struct cpumask
>                         sd = build_sched_domain(tl, cpu_map, attr, sd, i);
>
>                         has_asym |= sd->flags & SD_ASYM_CPUCAPACITY;
> +                       has_cluster |= sd->flags & SD_CLUSTER;
>
>                         if (tl == sched_domain_topology)
>                                 *per_cpu_ptr(d.sd, i) = sd;
> @@ -2399,6 +2403,9 @@ build_sched_domains(const struct cpumask
>         if (has_asym)
>                 static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
>
> +       if (has_cluster)
> +               static_branch_inc_cpuslocked(&sched_cluster_active);
> +
>         if (rq && sched_debug_verbose) {
>                 pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
>                         cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
> @@ -2498,6 +2505,9 @@ static void detach_destroy_domains(const
>         if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
>                 static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
>
> +       if (rcu_access_pointer(per_cpu(sd_cluster, cpu)))
> +               static_branch_dec_cpuslocked(&sched_cluster_active);
> +
>         rcu_read_lock();
>         for_each_cpu(i, cpu_map)
>                 cpu_attach_domain(NULL, &def_root_domain, i);

Thanks
Barry