lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-Id: <1505420820-7274-3-git-send-email-prarit@redhat.com> Date: Thu, 14 Sep 2017 16:26:59 -0400 From: Prarit Bhargava <prarit@...hat.com> To: linux-kernel@...r.kernel.org Cc: kan.liang@...el.com, x86@...nel.org, andi@...stfloor.org, peterz@...radead.org, Andi Kleen <ak@...ux.intel.com> Subject: [PATCH 2/3 v2] x86/topology: Avoid wasting 128k for package id array From: Andi Kleen <ak@...ux.intel.com> I was looking at large early boot allocations and noticed that since (1f12e32f x86/topology: Create logical package id) every 64bit system allocates a 128k array to convert logical package ids. This happens because the array is sized for MAX_LOCAL_APIC and that is always 32k on 64bit systems, and it needs 4 bytes for each entry. This is fairly wasteful, especially for the common case of having only one socket, where we need 128K just to store a single 4 byte value. The max logical APIC value is not known at this point, so it's hard to size it correctly. The previous patch converted the only performance critical user to cache the value, and all others are fairly slow path, so we can just convert the O(1) array lookup to a linear search in cpu_data() This can also avoid the need for an extra bitmap structure to know if the logical package ID is already allocated. We can also save this information in cpu_data and look it up during the search. This patch removes the explicit arrays and replaces the lookups with explicit searches. Overall the new code is somewhat simpler, and needs a lot less run time memory. The naming of the variables in cpu_data is still not great (_proc sometimes means package and sometimes means logical processor), but I followed the existing (messy) conventions when possible. At some point would be probably good to clean this up too. Tested on a 2S system, but it would be good to test on more obscure systems which may have problems with package IDs. I'm copying Prarit who had problematic systems before. [v2]: Decrease logical_packages when the last thread in a socket is removed. --- arch/x86/include/asm/processor.h | 6 +++- arch/x86/kernel/smpboot.c | 62 +++++++++++++++++----------------------- 2 files changed, 32 insertions(+), 36 deletions(-) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3fa26a61eabc..d369d2a82d8f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -124,13 +124,17 @@ struct cpuinfo_x86 { u16 booted_cores; /* Physical processor id: */ u16 phys_proc_id; - /* Logical processor id: */ + /* Logical processor (package) id: */ u16 logical_proc_id; + /* Physical package ID */ + u16 phys_pkg_id; /* Core id: */ u16 cpu_core_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; + /* Flags */ + unsigned logical_proc_set : 1; } __randomize_layout; struct cpuid_regs { diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0854ff169274..a6e9bd971b91 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -100,9 +100,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ -static int *physical_to_logical_pkg __read_mostly; -static unsigned long *physical_package_map __read_mostly;; -static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; @@ -284,17 +281,11 @@ static void notrace start_secondary(void *unused) */ int topology_update_package_map(unsigned int pkg, unsigned int cpu) { - unsigned int new; + int new; - /* Called from early boot ? */ - if (!physical_package_map) - return 0; - - if (pkg >= max_physical_pkg_id) - return -EINVAL; - - /* Set the logical package id */ - if (test_and_set_bit(pkg, physical_package_map)) + /* Already available somewhere? */ + new = topology_phys_to_logical_pkg(pkg); + if (new >= 0) goto found; if (logical_packages >= __max_logical_packages) { @@ -304,14 +295,14 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu) } new = logical_packages++; - if (new != pkg) { + if (new != pkg) pr_info("CPU %u Converting physical %u to logical package %u\n", cpu, pkg, new); - } - physical_to_logical_pkg[pkg] = new; found: - cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + cpu_data(cpu).phys_pkg_id = pkg; + cpu_data(cpu).logical_proc_id = new; + cpu_data(cpu).logical_proc_set = 1; return 0; } @@ -322,16 +313,21 @@ int topology_update_package_map(unsigned int pkg, unsigned int cpu) */ int topology_phys_to_logical_pkg(unsigned int phys_pkg) { - if (phys_pkg >= max_physical_pkg_id) - return -1; - return physical_to_logical_pkg[phys_pkg]; + int cpu; + + for_each_possible_cpu (cpu) { + if (cpu_data(cpu).phys_pkg_id == phys_pkg && + cpu_data(cpu).logical_proc_set) { + return cpu_data(cpu).logical_proc_id; + } + } + return -1; } EXPORT_SYMBOL(topology_phys_to_logical_pkg); static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) { unsigned int ncpus; - size_t size; /* * Today neither Intel nor AMD support heterogenous systems. That @@ -362,21 +358,10 @@ static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) } __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - logical_packages = 0; - - /* - * Possibly larger than what we need as the number of apic ids per - * package can be smaller than the actual used apic ids. - */ - max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); - size = max_physical_pkg_id * sizeof(unsigned int); - physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); - memset(physical_to_logical_pkg, 0xff, size); - size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); - physical_package_map = kzalloc(size, GFP_KERNEL); - pr_info("Max logical packages: %u\n", __max_logical_packages); + logical_packages = 0; + topology_update_package_map(c->phys_proc_id, cpu); } @@ -1507,7 +1492,7 @@ static void recompute_smt_state(void) static void remove_siblinginfo(int cpu) { - int sibling; + int phys_pkg_id, sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); for_each_cpu(sibling, topology_core_cpumask(cpu)) { @@ -1528,6 +1513,13 @@ static void remove_siblinginfo(int cpu) cpumask_clear(topology_core_cpumask(cpu)); c->phys_proc_id = 0; c->cpu_core_id = 0; + + /* last core in socket going down? */ + phys_pkg_id = c->phys_pkg_id; + c->phys_pkg_id = U16_MAX; + if (topology_phys_to_logical_pkg(phys_pkg_id) < 0) + logical_packages--; + cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } -- 1.8.5.5
Powered by blists - more mailing lists