linux-kernel - [PATCH V2] sched: topology: make cache topology separate from cpu topology

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1646969135-26647-1-git-send-email-wangqing@vivo.com>
Date:   Thu, 10 Mar 2022 19:25:33 -0800
From:   Qing Wang <wangqing@...o.com>
To:     Catalin Marinas <catalin.marinas@....com>,
        Will Deacon <will@...nel.org>,
        Sudeep Holla <sudeep.holla@....com>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        "Rafael J. Wysocki" <rafael@...nel.org>,
        Ingo Molnar <mingo@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Juri Lelli <juri.lelli@...hat.com>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Ben Segall <bsegall@...gle.com>, Mel Gorman <mgorman@...e.de>,
        Daniel Bristot de Oliveira <bristot@...hat.com>,
        linux-arm-kernel@...ts.infradead.org, linux-kernel@...r.kernel.org
Cc:     Wang Qing <wangqing@...o.com>
Subject: [PATCH V2] sched: topology: make cache topology separate from cpu topology

From: Wang Qing <wangqing@...o.com>

Some architectures(e.g. ARM64), caches are implemented like below:
SD(Level 1):          ************ DIE ************
SD(Level 0):          **** MC ****    **** MC *****
cluster:              **cluster 0**   **cluster 1**
cores:                0   1   2   3   4   5   6   7
cache(Level 1):       C   C   C   C   C   C   C   C
cache(Level 2):  	  **C**   **C**   **C**   **C**
cache(Level 3):       *******shared Level 3********
sd_llc_id(current):   0   0   0   0   4   4   4   4
sd_llc_id(should be): 0   0   2   2   4   4   6   6

Caches and cpus have different topology, this causes cpus_share_cache()
return the wrong value in sd, which will affect the CPU load balance.

The cost of migration in core[0-1] is different to core[2-3] within sd,
because core[0-1] shared L2 cache, but not shared with core[2-3].

Cache topology should be separated with CPU topology, it can be obtained
from "next-level-cache" in DTS preferentially.

V2:
move fix_cpu_llc() to arch_topology.c

Signed-off-by: Wang Qing <wangqing@...o.com>
---
 arch/arm64/kernel/smp.c       |  1 +
 drivers/base/arch_topology.c  | 56 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/arch_topology.h |  2 ++
 kernel/sched/topology.c       |  3 +++
 4 files changed, 62 insertions(+)

diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 27df5c1..94cf649
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -723,6 +723,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	unsigned int this_cpu;
 
 	init_cpu_topology();
+	init_cpu_cache_topology();
 
 	this_cpu = smp_processor_id();
 	store_cpu_topology(this_cpu);
diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c
index 9761541..d6e59b8
--- a/drivers/base/arch_topology.c
+++ b/drivers/base/arch_topology.c
@@ -609,6 +609,62 @@ static int __init parse_dt_topology(void)
 #endif
 
 /*
+ * cpu cache topology table
+ */
+#define MAX_CACHE_LEVEL 7
+struct device_node *cache_topology[NR_CPUS][MAX_CACHE_LEVEL];
+
+void init_cpu_cache_topology(void)
+{
+	struct device_node *node_cpu, *node_cache;
+	int cpu, level;
+
+	for_each_possible_cpu(cpu) {
+		node_cpu = of_get_cpu_node(cpu, NULL);
+		if (!node_cpu)
+			continue;
+
+		level = 0;
+		node_cache = node_cpu;
+		while (level < MAX_CACHE_LEVEL) {
+			node_cache = of_parse_phandle(node_cache, "next-level-cache", 0);
+			if (!node_cache)
+				break;
+
+			cache_topology[cpu][level++] = node_cache;
+		}
+		of_node_put(node_cpu);
+	}
+}
+
+void fix_cpu_llc(int cpu, int *first_cpu, int *cpu_num)
+{
+	int cache_level, cpu_id;
+	int first, last;
+	int id = *first_cpu;
+	int size = *cpu_num;
+
+	for (cache_level = 0; cache_level < MAX_CACHE_LEVEL; cache_level++) {
+		if (!cache_topology[cpu][cache_level])
+			break;
+
+		first = -1;
+		last = id;
+		for (cpu_id = 0; cpu_id < NR_CPUS; cpu_id++) {
+			if (cache_topology[cpu][cache_level] == cache_topology[cpu_id][cache_level]) {
+				if (cpu_id < id || cpu_id >= id + size)
+					return;
+
+				first = (first == -1)?cpu_id:first;
+				last = cpu_id;
+			}
+		}
+		*first_cpu = first;
+		*cpu_num = last - first + 1;
+	}
+}
+
+/*
  * cpu topology table
  */
 struct cpu_topology cpu_topology[NR_CPUS];
diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h
index cce6136b..3048fa6
--- a/include/linux/arch_topology.h
+++ b/include/linux/arch_topology.h
@@ -82,6 +82,8 @@ extern struct cpu_topology cpu_topology[NR_CPUS];
 #define topology_cluster_cpumask(cpu)	(&cpu_topology[cpu].cluster_sibling)
 #define topology_llc_cpumask(cpu)	(&cpu_topology[cpu].llc_sibling)
 void init_cpu_topology(void);
+void init_cpu_cache_topology(void);
+void fix_cpu_llc(int cpu, int *first_cpu, int *cpu_num);
 void store_cpu_topology(unsigned int cpuid);
 const struct cpumask *cpu_coregroup_mask(int cpu);
 const struct cpumask *cpu_clustergroup_mask(int cpu);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d201a70..d894ced
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -661,6 +661,9 @@ static void update_top_cache_domain(int cpu)
 	if (sd) {
 		id = cpumask_first(sched_domain_span(sd));
 		size = cpumask_weight(sched_domain_span(sd));
+#ifdef CONFIG_GENERIC_ARCH_TOPOLOGY
+		fix_cpu_llc(cpu, &id, &size);
+#endif
 		sds = sd->shared;
 	}
 
-- 
2.7.4