From 6047e7700eb47a01f7ddb794677a72513fd2ff2f Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Thu, 29 Aug 2024 11:19:08 +0200 Subject: [PATCH] [RFC] arm64: topology: Enable CONFIG_SMT_NUM_THREADS_DYNAMIC - On arm64 ACPI systems, change the thread_id assignment to have increasing values starting from 0. This is already the case for DT based systems. Doing so allows to uniformly identify the n-th thread of a given CPU. - Check that all CPUs have the same number of threads (for DT/ACPI) - Enable CONFIG_SMT_NUM_THREADS_DYNAMIC On a Tx2, with 256 CPUs, threads siblings being 0,32,64,96 for socket0 and 128 + (0,32,64,96) for socket1: $ cd /sys/devices/system/cpu/smt/ $ cat ../online 0-255 $ echo 2 > control $ cat ../online 0-63,128-191 $ echo 3 > control $ cat ../online 0-95,128-223 $ echo on > control $ cat ../online 0-255 --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/topology.h | 8 ++++++++ arch/arm64/kernel/topology.c | 16 +++++++++++++++- drivers/base/arch_topology.c | 8 +++++--- include/linux/arch_topology.h | 1 + 5 files changed, 30 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bd3bc2f5e0ec..1d8521483065 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -239,6 +239,7 @@ config ARM64 select HAVE_GENERIC_VDSO select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select HOTPLUG_SMT if (SMP && HOTPLUG_CPU) + select SMT_NUM_THREADS_DYNAMIC if HOTPLUG_SMT select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 0f6ef432fb84..7dd211f81687 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -39,6 +39,14 @@ void update_freq_counters_refs(void); #define arch_scale_hw_pressure topology_get_hw_pressure #define arch_update_hw_pressure topology_update_hw_pressure +#ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC +#include +static inline bool topology_smt_thread_allowed(unsigned int cpu) +{ + return topology_thread_id(cpu) < cpu_smt_num_threads; +} +#endif + #include #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index f72e1e55b05e..a83babe19972 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -47,7 +47,9 @@ int __init parse_acpi_topology(void) { int thread_num, max_smt_thread_num = 1; struct xarray core_threads; + bool have_thread = false; int cpu, topology_id; + unsigned long i; void *entry; if (acpi_disabled) @@ -61,6 +63,8 @@ int __init parse_acpi_topology(void) return topology_id; if (acpi_cpu_is_threaded(cpu)) { + have_thread = true; + cpu_topology[cpu].thread_id = topology_id; topology_id = find_acpi_cpu_topology(cpu, 1); cpu_topology[cpu].core_id = topology_id; @@ -69,9 +73,10 @@ int __init parse_acpi_topology(void) if (!entry) { xa_store(&core_threads, topology_id, xa_mk_value(1), GFP_KERNEL); + cpu_topology[cpu].thread_id = 0; } else { thread_num = xa_to_value(entry); - thread_num++; + cpu_topology[cpu].thread_id = thread_num++; xa_store(&core_threads, topology_id, xa_mk_value(thread_num), GFP_KERNEL); @@ -86,8 +91,17 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].cluster_id = topology_id; topology_id = find_acpi_cpu_topology_package(cpu); cpu_topology[cpu].package_id = topology_id; + + pr_debug("CPU%u: package=0x%x cluster=0x%x core=0x%x thread=0x%x\n", + cpu, cpu_topology[cpu].package_id, cpu_topology[cpu].cluster_id, + cpu_topology[cpu].core_id, cpu_topology[cpu].thread_id); } + if (have_thread) + xa_for_each(&core_threads, i, entry) + if (xa_to_value(entry) != max_smt_thread_num) + pr_warn("Heterogeneous SMT topology not handled"); + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); xa_destroy(&core_threads); diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 95513abd664f..20d7f5b72ddd 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -532,13 +532,15 @@ static int __init get_cpu_for_node(struct device_node *node) return cpu; } -static void __init update_smt_num_threads(unsigned int num_threads) +static void __init update_smt_num_threads(int num_threads) { - static unsigned int max_smt_thread_num = 1; + static int max_smt_thread_num = -1; - if (num_threads > max_smt_thread_num) { + if (max_smt_thread_num < 0) { max_smt_thread_num = num_threads; cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + } else if (num_threads != max_smt_thread_num) { + pr_warn("Heterogeneous SMT topology not handled"); } } diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index b721f360d759..afdfdc64a0a1 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -87,6 +87,7 @@ extern struct cpu_topology cpu_topology[NR_CPUS]; #define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) #define topology_cluster_id(cpu) (cpu_topology[cpu].cluster_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_thread_id(cpu) (cpu_topology[cpu].thread_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) #define topology_cluster_cpumask(cpu) (&cpu_topology[cpu].cluster_sibling) -- 2.25.1