In order to more efficiently iterate cores/smt, we need a cpumask containing only the first thread of each core (for the LLC domain). And since we're iterating SMT specific things, move sched_init_smt() over there. Also track how many threads per core we have. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/sched/topology.h | 9 +++++++++ kernel/sched/core.c | 18 ------------------ kernel/sched/fair.c | 3 +++ kernel/sched/sched.h | 2 ++ kernel/sched/topology.c | 35 +++++++++++++++++++++++++++++++++-- 5 files changed, 47 insertions(+), 20 deletions(-) --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -72,6 +72,8 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + + unsigned long core_mask[0]; }; struct sched_domain { @@ -162,6 +164,13 @@ static inline struct cpumask *sched_doma return to_cpumask(sd->span); } +#ifdef CONFIG_SCHED_SMT +static inline struct cpumask *sched_domain_cores(struct sched_domain *sd) +{ + return to_cpumask(sd->shared->core_mask); +} +#endif + extern void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], struct sched_domain_attr *dattr_new); --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5854,22 +5854,6 @@ int sched_cpu_dying(unsigned int cpu) } #endif -#ifdef CONFIG_SCHED_SMT -DEFINE_STATIC_KEY_FALSE(sched_smt_present); - -static void sched_init_smt(void) -{ - /* - * We've enumerated all CPUs and will assume that if any CPU - * has SMT siblings, CPU0 will too. - */ - if (cpumask_weight(cpu_smt_mask(0)) > 1) - static_branch_enable(&sched_smt_present); -} -#else -static inline void sched_init_smt(void) { } -#endif - void __init sched_init_smp(void) { sched_init_numa(); @@ -5891,8 +5875,6 @@ void __init sched_init_smp(void) init_sched_rt_class(); init_sched_dl_class(); - sched_init_smt(); - sched_smp_initialized = true; } --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6238,6 +6238,9 @@ static inline int find_idlest_cpu(struct } #ifdef CONFIG_SCHED_SMT +DEFINE_STATIC_KEY_FALSE(sched_smt_present); + +__read_mostly int sched_smt_weight = 1; static inline void set_idle_cores(int cpu, int val) { --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -910,6 +910,8 @@ static inline void update_idle_core(stru __update_idle_core(rq); } +extern __read_mostly int sched_smt_weight; + #else static inline void update_idle_core(struct rq *rq) { } #endif --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1190,8 +1190,39 @@ sd_init(struct sched_domain_topology_lev */ if (sd->flags & SD_SHARE_PKG_RESOURCES) { sd->shared = *per_cpu_ptr(sdd->sds, sd_id); - atomic_inc(&sd->shared->ref); atomic_set(&sd->shared->nr_busy_cpus, sd_weight); + if (atomic_read(&sd->shared->ref)) { + atomic_inc(&sd->shared->ref); + } else { +#ifdef CONFIG_SCHED_SMT + int core, smt, smt_weight; + + /* + * Set the first SMT sibling of each core present in + * the domain span. + */ + for_each_cpu(core, sched_domain_span(sd)) { + for_each_cpu(smt, cpu_smt_mask(core)) { + if (cpumask_test_cpu(smt, sched_domain_span(sd))) { + __cpumask_set_cpu(smt, sched_domain_cores(sd)); + break; + } + } + + /* + * And track the presence and number of threads per core. + */ + + smt_weight = cpumask_weight(cpu_smt_mask(core)); + if (smt_weight > sched_smt_weight) { + sched_smt_weight = smt_weight; + static_branch_enable(&sched_smt_present); + } + } +#endif + + atomic_set(&sd->shared->ref, 1); + } } sd->private = sdd; @@ -1537,7 +1568,7 @@ static int __sdt_alloc(const struct cpum *per_cpu_ptr(sdd->sd, j) = sd; - sds = kzalloc_node(sizeof(struct sched_domain_shared), + sds = kzalloc_node(sizeof(struct sched_domain_shared) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); if (!sds) return -ENOMEM;