[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230601153522.GB559993@hirez.programming.kicks-ass.net>
Date:   Thu, 1 Jun 2023 17:35:22 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     K Prateek Nayak <kprateek.nayak@....com>
Cc:     linux-kernel@...r.kernel.org, linux-tip-commits@...r.kernel.org,
        Tejun Heo <tj@...nel.org>, x86@...nel.org,
        Gautham Shenoy <gautham.shenoy@....com>
Subject: Re: [tip: sched/core] sched/fair: Multi-LLC select_idle_sibling()
On Thu, Jun 01, 2023 at 04:47:06PM +0200, Peter Zijlstra wrote:
> One way to fix all this would be by having arch/x86/kernel/smpboot.c set
> an AMD specific set_sched_topology() that has a CCD domain above the MC
> and below the DIE domain that groups 'near' CCDs together based on some
> AMD specific topology information.
> 
> Then for small systems that will probably be just a single CCD domain
> and the degenerate code will make it go away, but for these large
> systems it will do what is right for their respective configuration.
> 
> Then, since this new multi-llc code uses MC->parent it will end up on
> the fancy new CCD domain and not scan the *entire* socket.
> 
> Hmm?
Something like the (untested) below might be a nice base to go from.
Then all you have to do is add something like:
	if (x86_has_ccd_topology) {
		x86_topology[i++] = (struct sched_domain_topology_level){
			cpu_ccd_mask, SD_INIT_NAME(CCD)
		};
	}
(and construct cpu_ccd_mask obviously...)
---
 arch/x86/kernel/smpboot.c | 94 ++++++++++++++++++++++-------------------------
 1 file changed, 43 insertions(+), 51 deletions(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 34066f6735dd..0a22d719b6b6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -563,50 +563,57 @@ static int x86_cluster_flags(void)
 #endif
 #endif
 
-static struct sched_domain_topology_level x86_numa_in_package_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-	{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-#ifdef CONFIG_SCHED_CLUSTER
-	{ cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
-#endif
-#ifdef CONFIG_SCHED_MC
-	{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
-#endif
-	{ NULL, },
-};
+/*
+ * Set if a package/die has multiple NUMA nodes inside.
+ * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
+ * Sub-NUMA Clustering have this.
+ */
+static bool x86_has_numa_in_package;
 
-static struct sched_domain_topology_level x86_hybrid_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-	{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-#ifdef CONFIG_SCHED_MC
-	{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
-#endif
-	{ cpu_cpu_mask, x86_sched_itmt_flags, SD_INIT_NAME(DIE) },
-	{ NULL, },
-};
+static struct sched_domain_topology_level x86_topology[6];
+
+static void __init build_sched_topology(void)
+{
+	int i = 0;
 
-static struct sched_domain_topology_level x86_topology[] = {
 #ifdef CONFIG_SCHED_SMT
-	{ cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT) },
+	x86_topology[i++] = (struct sched_domain_topology_level){
+		cpu_smt_mask, x86_smt_flags, SD_INIT_NAME(SMT)
+	};
 #endif
 #ifdef CONFIG_SCHED_CLUSTER
-	{ cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) },
+	/*
+	 * For now, skip the cluster domain on Hybrid.
+	 */
+	if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
+		x86_topology[i++] = (struct sched_domain_topology_level){
+			cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
+		};
+	}
 #endif
 #ifdef CONFIG_SCHED_MC
-	{ cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC) },
+	x86_topology[i++] = (struct sched_domain_topology_level){
+		cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC)
+	};
 #endif
-	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
-	{ NULL, },
-};
+	/*
+	 * When there is NUMA topology inside the package skip the DIE domain
+	 * since the NUMA domains will auto-magically create the right spanning
+	 * domains based on the SLIT.
+	 */
+	if (!x86_has_numa_in_package) {
+		x86_topology[i++] = (struct sched_domain_topology_level){
+			cpu_cpu_mask, SD_INIT_NAME(DIE)
+		};
+	}
 
-/*
- * Set if a package/die has multiple NUMA nodes inside.
- * AMD Magny-Cours, Intel Cluster-on-Die, and Intel
- * Sub-NUMA Clustering have this.
- */
-static bool x86_has_numa_in_package;
+	/*
+	 * There must be one trailing NULL entry left.
+	 */
+	BUG_ON(i >= ARRAY_SIZE(x86_topology)-1);
+
+	set_sched_topology(x86_topology);
+}
 
 void set_cpu_sibling_map(int cpu)
 {
@@ -1390,15 +1397,6 @@ void __init smp_prepare_cpus_common(void)
 		zalloc_cpumask_var(&per_cpu(cpu_l2c_shared_map, i), GFP_KERNEL);
 	}
 
-	/*
-	 * Set 'default' x86 topology, this matches default_topology() in that
-	 * it has NUMA nodes as a topology level. See also
-	 * native_smp_cpus_done().
-	 *
-	 * Must be done before set_cpus_sibling_map() is ran.
-	 */
-	set_sched_topology(x86_topology);
-
 	set_cpu_sibling_map(0);
 }
 
@@ -1490,13 +1488,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 	pr_debug("Boot done\n");
 
 	calculate_max_logical_packages();
-
-	/* XXX for now assume numa-in-package and hybrid don't overlap */
-	if (x86_has_numa_in_package)
-		set_sched_topology(x86_numa_in_package_topology);
-	if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
-		set_sched_topology(x86_hybrid_topology);
-
+	build_sched_topology();
 	nmi_selftest();
 	impress_friends();
 	cache_aps_init();
Powered by blists - more mailing lists
 
