linux-kernel - Re: [PATCH 1/2] sched: Add per_cpu cluster domain info and cpus_share

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YcL80lUZi4f9zqjB@BLR-5CG11610CF.amd.com>
Date:   Wed, 22 Dec 2021 15:54:18 +0530
From:   "Gautham R. Shenoy" <gautham.shenoy@....com>
To:     Yicong Yang <yangyicong@...ilicon.com>
Cc:     peterz@...radead.org, mingo@...hat.com, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, tim.c.chen@...ux.intel.com,
        linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
        dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
        bristot@...hat.com, prime.zeng@...wei.com,
        jonathan.cameron@...wei.com, ego@...ux.vnet.ibm.com,
        srikar@...ux.vnet.ibm.com, linuxarm@...wei.com, 21cnbao@...il.com,
        song.bao.hua@...ilicon.com, guodong.xu@...aro.org
Subject: Re: [PATCH 1/2] sched: Add per_cpu cluster domain info and
 cpus_share_cluster API

Hello Yicong, Barry,


On Wed, Dec 15, 2021 at 12:11:48PM +0800, Yicong Yang wrote:
> From: Barry Song <song.bao.hua@...ilicon.com>
> 
> Add per-cpu cluster domain info and cpus_share_cluster() API.
> This is the preparation for the optimization of select_idle_cpu()
> on platforms with cluster scheduler level.
> 
> Signed-off-by: Barry Song <song.bao.hua@...ilicon.com>
> Signed-off-by: Yicong Yang <yangyicong@...ilicon.com>
> ---
>  include/linux/sched/sd_flags.h |  5 +++++
>  include/linux/sched/topology.h |  8 +++++++-
>  kernel/sched/core.c            | 12 ++++++++++++
>  kernel/sched/sched.h           |  2 ++
>  kernel/sched/topology.c        |  9 +++++++++
>  5 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
> index 57bde66d95f7..0f732bcfbb2c 100644
> --- a/include/linux/sched/sd_flags.h
> +++ b/include/linux/sched/sd_flags.h
> @@ -109,6 +109,11 @@ SD_FLAG(SD_ASYM_CPUCAPACITY_FULL, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
>   */
>  SD_FLAG(SD_SHARE_CPUCAPACITY, SDF_SHARED_CHILD | SDF_NEEDS_GROUPS)
>  
> +/*
> + * Set up for cluster domains sharing resources such as llc tags or l2
> + */
> +SD_FLAG(SD_CLUSTER, SDF_NEEDS_GROUPS)
> +
>  /*
>   * Domain members share CPU package resources (i.e. caches)
>   *
> diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
> index c07bfa2d80f2..78c3a94fae66 100644
> --- a/include/linux/sched/topology.h
> +++ b/include/linux/sched/topology.h
> @@ -45,7 +45,7 @@ static inline int cpu_smt_flags(void)
>  #ifdef CONFIG_SCHED_CLUSTER
>  static inline int cpu_cluster_flags(void)
>  {
> -	return SD_SHARE_PKG_RESOURCES;
> +	return SD_CLUSTER | SD_SHARE_PKG_RESOURCES;

On non-cluster systems, there would be only one group at the at the
CLS domain. Since SD_CLUSTER is also tagged with SDF_NEEDS_GROUP, it
would need the presence of two groups. Thus, on such non-cluster
systems, the CLS domain will continue to be degenerated in favour of
the SMT domain.

>  }
>  #endif
>  
> @@ -177,6 +177,7 @@ cpumask_var_t *alloc_sched_domains(unsigned int ndoms);
>  void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
>  
>  bool cpus_share_cache(int this_cpu, int that_cpu);
> +bool cpus_share_cluster(int this_cpu, int that_cpu);
>  
>  typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
>  typedef int (*sched_domain_flags_f)(void);
> @@ -230,6 +231,11 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
>  	return true;
>  }
>  
> +static inline bool cpus_share_cluster(int this_cpu, int that_cpu)
> +{
> +	return true;
> +}
> +
>  #endif	/* !CONFIG_SMP */
>  
>  #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3c9b0fda64ac..11f9b25c3068 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3732,6 +3732,18 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
>  	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
>  }
>  
> +/*
> + * On non-Cluster machine this function works same with cpus_share_cache()
> + * as sd_cluster_id equals to sd_llc_id.
> + */

Ok, so on a non-cluster machine, a cluster is trivially defined to be
a single core (this is reflected in what the cpu_cluster_mask() shows
up on such machines).

However, in cpus_share_cluster(), we are upgrading the definition of a
cluster to be group of CPUs that share the LLC. This is inconsistent
with the original definition. Is there a way to avoid this
inconsistency ?



> +bool cpus_share_cluster(int this_cpu, int that_cpu)
> +{
> +	if (this_cpu == that_cpu)
> +		return true;
> +
> +	return per_cpu(sd_cluster_id, this_cpu) == per_cpu(sd_cluster_id, that_cpu);
> +}
> +
>  static inline bool ttwu_queue_cond(int cpu, int wake_flags)
>  {
>  	/*
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 0e66749486e7..ddd29879ad40 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1763,7 +1763,9 @@ static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
>  DECLARE_PER_CPU(int, sd_llc_size);
>  DECLARE_PER_CPU(int, sd_llc_id);
> +DECLARE_PER_CPU(int, sd_cluster_id);
>  DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
> +DECLARE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
>  DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index d201a7052a29..5642df384904 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -644,6 +644,8 @@ static void destroy_sched_domains(struct sched_domain *sd)
>  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
>  DEFINE_PER_CPU(int, sd_llc_size);
>  DEFINE_PER_CPU(int, sd_llc_id);
> +DEFINE_PER_CPU(int, sd_cluster_id);
> +DEFINE_PER_CPU(struct sched_domain __rcu *, sd_cluster);
>  DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
>  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
>  DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
> @@ -669,6 +671,12 @@ static void update_top_cache_domain(int cpu)
>  	per_cpu(sd_llc_id, cpu) = id;
>  	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
>  
> +	sd = lowest_flag_domain(cpu, SD_CLUSTER);
> +	if (sd)
> +		id = cpumask_first(sched_domain_span(sd));
> +	rcu_assign_pointer(per_cpu(sd_cluster, cpu), sd);
> +	per_cpu(sd_cluster_id, cpu) = id;
> +
>  	sd = lowest_flag_domain(cpu, SD_NUMA);
>  	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
>  
> @@ -1514,6 +1522,7 @@ static unsigned long __read_mostly *sched_numa_onlined_nodes;
>   */
>  #define TOPOLOGY_SD_FLAGS		\
>  	(SD_SHARE_CPUCAPACITY	|	\
> +	 SD_CLUSTER		|	\
>  	 SD_SHARE_PKG_RESOURCES |	\
>  	 SD_NUMA		|	\
>  	 SD_ASYM_PACKING)
> -- 
> 2.33.0
> 


--
Thanks and Regards
gautham.