lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230830122614.73067-1-srikar@linux.vnet.ibm.com>
Date:   Wed, 30 Aug 2023 17:56:14 +0530
From:   Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
To:     Michael Ellerman <mpe@...erman.id.au>
Cc:     linuxppc-dev <linuxppc-dev@...ts.ozlabs.org>,
        Srikar Dronamraju <srikar@...ux.vnet.ibm.com>,
        Nicholas Piggin <npiggin@...il.com>,
        Christophe Leroy <christophe.leroy@...roup.eu>,
        Peter Zijlstra <peterz@...radead.org>, ndesaulniers@...gle.com,
        Nathan Lynch <nathanl@...ux.ibm.com>,
        Josh Poimboeuf <jpoimboe@...nel.org>,
        Mark Rutland <mark.rutland@....com>,
        linux-kernel@...r.kernel.org
Subject: [PATCH] powerpc/smp: Dynamically build powerpc topology

Currently there are four powerpc specific sched topologies.  These are
all statically defined.  However not all these topologies are used by
all powerpc systems.

To avoid unnecessary degenerations by the scheduler , masks and flags
are compared. However if the sched topologies are build dynamically then
the code is simpler and there are greater chances of avoiding
degenerations.

Even x86 builds its sched topologies dynamically and new changes are
very similar to the way x86 is building its topologies.

System Configuration
type=Shared mode=Uncapped smt=8 lcpu=128 mem=1063126592 kB cpus=96 ent=40.00

$ lscpu
Architecture:                    ppc64le
Byte Order:                      Little Endian
CPU(s):                          1024
On-line CPU(s) list:             0-1023
Model name:                      POWER10 (architected), altivec supported
Model:                           2.0 (pvr 0080 0200)
Thread(s) per core:              8
Core(s) per socket:              32
Socket(s):                       4
Hypervisor vendor:               pHyp
Virtualization type:             para
L1d cache:                       8 MiB (256 instances)
L1i cache:                       12 MiB (256 instances)
NUMA node(s):                    4

>From dmesg of v6.5
[    0.174444] smp: Bringing up secondary CPUs ...
[    3.918535] smp: Brought up 4 nodes, 1024 CPUs
[   38.001402] sysrq: Changing Loglevel
[   38.001446] sysrq: Loglevel set to 9

>From dmesg of v6.5 + patch
[    0.174462] smp: Bringing up secondary CPUs ...
[    3.421462] smp: Brought up 4 nodes, 1024 CPUs
[   35.417917] sysrq: Changing Loglevel
[   35.417959] sysrq: Loglevel set to 9

5 runs of ppc64_cpu --smt=1 (time measured: lesser is better)
Kernel  N  Min     Max     Median  Avg      Stddev     %Change
v6.5    5  518.08  574.27  528.61  535.388  22.341542
+patch  5  481.73  495.47  484.21  486.402  5.7997     -9.14963

5 runs of ppc64_cpu --smt=8 (time measured: lesser is better)
Kernel  N  Min      Max      Median   Avg       Stddev     %Change
v6.5    5  1094.12  1117.1   1108.97  1106.3    8.606361
+patch  5  1067.5   1090.03  1073.89  1076.574  9.4189347  -2.68697

Signed-off-by: Srikar Dronamraju <srikar@...ux.vnet.ibm.com>
---
 arch/powerpc/kernel/smp.c | 78 ++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 48b8161179a8..c16443a04c26 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -92,15 +92,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
 EXPORT_SYMBOL_GPL(has_big_cores);
 
-enum {
-#ifdef CONFIG_SCHED_SMT
-	smt_idx,
-#endif
-	cache_idx,
-	mc_idx,
-	die_idx,
-};
-
 #define MAX_THREAD_LIST_SIZE	8
 #define THREAD_GROUP_SHARE_L1   1
 #define THREAD_GROUP_SHARE_L2_L3 2
@@ -1048,16 +1039,6 @@ static const struct cpumask *cpu_mc_mask(int cpu)
 	return cpu_coregroup_mask(cpu);
 }
 
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-	{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
-	{ shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
-	{ cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) },
-	{ cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE) },
-	{ NULL, },
-};
-
 static int __init init_big_cores(void)
 {
 	int cpu;
@@ -1676,9 +1657,11 @@ void start_secondary(void *unused)
 	BUG();
 }
 
-static void __init fixup_topology(void)
+static struct sched_domain_topology_level powerpc_topology[6];
+
+static void __init build_sched_topology(void)
 {
-	int i;
+	int i = 0;
 
 	if (is_shared_processor()) {
 		asym_pack_flag = SD_ASYM_PACKING;
@@ -1690,36 +1673,33 @@ static void __init fixup_topology(void)
 #ifdef CONFIG_SCHED_SMT
 	if (has_big_cores) {
 		pr_info("Big cores detected but using small core scheduling\n");
-		powerpc_topology[smt_idx].mask = smallcore_smt_mask;
+		powerpc_topology[i++] = (struct sched_domain_topology_level){
+			smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+		};
+	} else {
+		powerpc_topology[i++] = (struct sched_domain_topology_level){
+			cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
+		};
 	}
 #endif
+	if (shared_caches) {
+		powerpc_topology[i++] = (struct sched_domain_topology_level){
+			shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
+		};
+	}
+	if (has_coregroup_support()) {
+		powerpc_topology[i++] = (struct sched_domain_topology_level){
+			cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
+		};
+	}
+	powerpc_topology[i++] = (struct sched_domain_topology_level){
+		cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(DIE)
+	};
 
-	if (!has_coregroup_support())
-		powerpc_topology[mc_idx].mask = powerpc_topology[cache_idx].mask;
-
-	/*
-	 * Try to consolidate topology levels here instead of
-	 * allowing scheduler to degenerate.
-	 * - Dont consolidate if masks are different.
-	 * - Dont consolidate if sd_flags exists and are different.
-	 */
-	for (i = 1; i <= die_idx; i++) {
-		if (powerpc_topology[i].mask != powerpc_topology[i - 1].mask)
-			continue;
-
-		if (powerpc_topology[i].sd_flags && powerpc_topology[i - 1].sd_flags &&
-				powerpc_topology[i].sd_flags != powerpc_topology[i - 1].sd_flags)
-			continue;
-
-		if (!powerpc_topology[i - 1].sd_flags)
-			powerpc_topology[i - 1].sd_flags = powerpc_topology[i].sd_flags;
+	/* There must be one trailing NULL entry left.  */
+	BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
 
-		powerpc_topology[i].mask = powerpc_topology[i + 1].mask;
-		powerpc_topology[i].sd_flags = powerpc_topology[i + 1].sd_flags;
-#ifdef CONFIG_SCHED_DEBUG
-		powerpc_topology[i].name = powerpc_topology[i + 1].name;
-#endif
-	}
+	set_sched_topology(powerpc_topology);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
@@ -1734,9 +1714,7 @@ void __init smp_cpus_done(unsigned int max_cpus)
 		smp_ops->bringup_done();
 
 	dump_numa_cpu_topology();
-
-	fixup_topology();
-	set_sched_topology(powerpc_topology);
+	build_sched_topology();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
-- 
2.41.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ