[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1650552960-60165-3-git-send-email-wangqing@vivo.com>
Date: Thu, 21 Apr 2022 07:55:58 -0700
From: Qing Wang <wangqing@...o.com>
To: Catalin Marinas <catalin.marinas@....com>,
Will Deacon <will@...nel.org>,
Sudeep Holla <sudeep.holla@....com>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
"Rafael J. Wysocki" <rafael@...nel.org>,
linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Cc: Wang Qing <wangqing@...o.com>
Subject: [PATCH 2/2] arm64: Add complex scheduler level for arm64
From: Wang Qing <wangqing@...o.com>
The DSU-110 DynamIQâ„¢ cluster supports blocks that are called complexes
which contain up to two cores of the same type and some shared logic.
Sharing some logic between the cores can make a complex area efficient.
This patch adds complex level for complexs and automatically enables
the load balance among complexs. It will directly benefit a lot of
workload which loves more resources such as memory bandwidth, caches.
Testing has been done in qcom sm8450 with Stream benchmark:
8threads stream (2 little cores * 2(complex) + 3 middle cores + 1 big core)
stream stream
w/o patch w/ patch
MB/sec copy 37579.2 ( 0.00%) 39127.3 ( 4.12%)
MB/sec scale 38261.1 ( 0.00%) 39195.4 ( 2.44%)
MB/sec add 39497.0 ( 0.00%) 41101.5 ( 4.06%)
MB/sec triad 39885.6 ( 0.00%) 40772.7 ( 2.22%)
Signed-off-by: Wang Qing <wangqing@...o.com>
---
arch/arm64/Kconfig | 13 +++++++++++
arch/arm64/kernel/smp.c | 48 ++++++++++++++++++++++++++++++++++++++++-
2 files changed, 60 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index edbe035cb0e3..4063de8c6153 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1207,6 +1207,19 @@ config SCHED_CLUSTER
by sharing mid-level caches, last-level cache tags or internal
busses.
+config SCHED_COMPLEX
+ bool "Complex scheduler support"
+ help
+ DSU supports blocks that are called complexes which contain up to
+ two cores of the same type and some shared logic. Sharing some logic
+ between the cores can make a complex area efficient.
+
+ Complex also can be considered as a shared cache group smaller
+ than cluster.
+
+ Complex scheduler support improves the CPU scheduler's decision
+ making when dealing with machines that have complexs of CPUs.
+
config SCHED_SMT
bool "SMT scheduler support"
help
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 3b46041f2b97..526765112146 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -14,6 +14,7 @@
#include <linux/sched/mm.h>
#include <linux/sched/hotplug.h>
#include <linux/sched/task_stack.h>
+#include <linux/sched/topology.h>
#include <linux/interrupt.h>
#include <linux/cache.h>
#include <linux/profile.h>
@@ -57,6 +58,10 @@
DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
+#ifdef SCHED_COMPLEX
+DEFINE_PER_CPU_READ_MOSTLY(cpumask_t, cpu_complex_map);
+#endif
+
/*
* as from 2.5, kernels no longer have an init_tasks structure
* so we need some other way of telling a new secondary core
@@ -715,6 +720,47 @@ void __init smp_init_cpus(void)
}
}
+#ifdef SCHED_COMPLEX
+static int arm64_complex_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+
+const struct cpumask *arm64_complex_mask(int cpu)
+{
+ const struct cpumask *core_mask = cpu_cpu_mask(cpu);
+
+ /* Find the smaller shared cache level than clustergroup and coregroup*/
+#ifdef CONFIG_SCHED_MC
+ core_mask = cpu_coregroup_mask(cpu);
+#endif
+#ifdef CONFIG_SCHED_CLUSTER
+ core_mask = cpu_clustergroup_mask(cpu);
+#endif
+
+ find_max_sub_sc(core_mask, cpu, &per_cpu(cpu_complex_map, cpu));
+
+ return &per_cpu(cpu_complex_map, cpu);
+}
+#endif
+
+static struct sched_domain_topology_level arm64_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_COMPLEX
+ { arm64_complex_mask, arm64_complex_flags, SD_INIT_NAME(CPL) },
+#endif
+#ifdef CONFIG_SCHED_CLUSTER
+ { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
+#endif
+#ifdef CONFIG_SCHED_MC
+ { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
const struct cpu_operations *ops;
@@ -723,9 +769,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
unsigned int this_cpu;
init_cpu_topology();
-
this_cpu = smp_processor_id();
store_cpu_topology(this_cpu);
+ set_sched_topology(arm64_topology);
numa_store_cpu_info(this_cpu);
numa_add_cpu(this_cpu);
--
2.27.0.windows.1
Powered by blists - more mailing lists