[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BYAPR21MB16882172252C0C013A0E3B5ED704A@BYAPR21MB1688.namprd21.prod.outlook.com>
Date: Sun, 30 Jul 2023 05:20:07 +0000
From: "Michael Kelley (LINUX)" <mikelley@...rosoft.com>
To: Thomas Gleixner <tglx@...utronix.de>,
LKML <linux-kernel@...r.kernel.org>
CC: "x86@...nel.org" <x86@...nel.org>,
Tom Lendacky <thomas.lendacky@....com>,
Andrew Cooper <andrew.cooper3@...rix.com>,
Arjan van de Ven <arjan@...ux.intel.com>,
"James E.J. Bottomley" <jejb@...ux.ibm.com>,
Dick Kennedy <dick.kennedy@...adcom.com>,
James Smart <james.smart@...adcom.com>,
"Martin K. Petersen" <martin.petersen@...cle.com>,
"linux-scsi@...r.kernel.org" <linux-scsi@...r.kernel.org>,
Guenter Roeck <linux@...ck-us.net>,
"linux-hwmon@...r.kernel.org" <linux-hwmon@...r.kernel.org>,
Jean Delvare <jdelvare@...e.com>,
Huang Rui <ray.huang@....com>, Juergen Gross <jgross@...e.com>,
Steve Wahl <steve.wahl@....com>,
Mike Travis <mike.travis@....com>,
Dimitri Sivanich <dimitri.sivanich@....com>,
Russ Anderson <russ.anderson@....com>
Subject: RE: [patch v2 28/38] x86/cpu: Provide an AMD/HYGON specific topology
parser
From: Thomas Gleixner <tglx@...utronix.de> Sent: Friday, July 28, 2023 5:13 AM
>
> AMD/HYGON uses various methods for topology evaluation:
>
> - Leaf 0x80000008 and 0x8000001e based with an optional leaf 0xb,
> which is the preferred variant for modern CPUs.
>
> Leaf 0xb will be superseeded by leaf 0x80000026 soon, which is just
> another variant of the Intel 0x1f leaf for whatever reasons.
>
> - Subleaf 0x80000008 and NODEID_MSR base
>
> - Legacy fallback
>
> That code is following the principle of random bits and pieces all over the
> place which results in multiple evaluations and impenetrable code flows in
> the same way as the Intel parsing did.
>
> Provide a sane implementation by clearly separating the three variants and
> bringing them in the proper preference order in one place.
>
> This provides the parsing for both AMD and HYGON because there is no point
> in having a separate HYGON parser which only differs by 3 lines of
> code. Any further divergence between AMD and HYGON can be handled in
> different functions, while still sharing the existing parsers.
>
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
> arch/x86/include/asm/topology.h | 2
> arch/x86/kernel/cpu/Makefile | 2
> arch/x86/kernel/cpu/amd.c | 2
> arch/x86/kernel/cpu/cacheinfo.c | 4
> arch/x86/kernel/cpu/cpu.h | 2
> arch/x86/kernel/cpu/debugfs.c | 2
> arch/x86/kernel/cpu/topology.h | 6 +
> arch/x86/kernel/cpu/topology_amd.c | 179
> ++++++++++++++++++++++++++++++++++
> arch/x86/kernel/cpu/topology_common.c | 19 +++
> 9 files changed, 211 insertions(+), 7 deletions(-)
>
> --- a/arch/x86/include/asm/topology.h
> +++ b/arch/x86/include/asm/topology.h
> @@ -162,6 +162,8 @@ int topology_update_die_map(unsigned int
> int topology_phys_to_logical_pkg(unsigned int pkg);
> bool topology_smt_supported(void);
>
> +extern unsigned int __amd_nodes_per_pkg;
> +
> static inline unsigned int topology_amd_nodes_per_pkg(void)
> {
> return __max_die_per_package;
> --- a/arch/x86/kernel/cpu/Makefile
> +++ b/arch/x86/kernel/cpu/Makefile
> @@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n
> KCSAN_SANITIZE_common.o := n
>
> obj-y := cacheinfo.o scattered.o
> -obj-y += topology_common.o topology_ext.o topology.o
> +obj-y += topology_common.o topology_ext.o topology_amd.o topology.o
> obj-y += common.o
> obj-y += rdrand.o
> obj-y += match.o
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -356,7 +356,7 @@ static void amd_get_topology(struct cpui
> if (!err)
> c->x86_coreid_bits = get_count_order(c->x86_max_cores);
>
> - cacheinfo_amd_init_llc_id(c);
> + cacheinfo_amd_init_llc_id(c, c->topo.die_id);
>
> } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
> u64 value;
> --- a/arch/x86/kernel/cpu/cacheinfo.c
> +++ b/arch/x86/kernel/cpu/cacheinfo.c
> @@ -661,7 +661,7 @@ static int find_num_cache_leaves(struct
> return i;
> }
>
> -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c)
> +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
> {
> /*
> * We may have multiple LLCs if L3 caches exist, so check if we
> @@ -672,7 +672,7 @@ void cacheinfo_amd_init_llc_id(struct cp
>
> if (c->x86 < 0x17) {
> /* LLC is at the node level. */
> - c->topo.llc_id = c->topo.die_id;
> + c->topo.llc_id = die_id;
> } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
> /*
> * LLC is at the core complex level.
> --- a/arch/x86/kernel/cpu/cpu.h
> +++ b/arch/x86/kernel/cpu/cpu.h
> @@ -79,7 +79,7 @@ extern void init_hygon_cacheinfo(struct
> extern int detect_extended_topology(struct cpuinfo_x86 *c);
> extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
>
> -void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c);
> +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id);
> void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c);
>
> unsigned int aperfmperf_get_khz(int cpu);
> --- a/arch/x86/kernel/cpu/debugfs.c
> +++ b/arch/x86/kernel/cpu/debugfs.c
> @@ -27,6 +27,8 @@ static int cpu_debug_show(struct seq_fil
> seq_printf(m, "logical_die_id: %u\n", c->topo.logical_die_id);
> seq_printf(m, "llc_id: %u\n", c->topo.llc_id);
> seq_printf(m, "l2c_id: %u\n", c->topo.l2c_id);
> + seq_printf(m, "amd_node_id: %u\n", c->topo.amd_node_id);
> + seq_printf(m, "amd_nodes_per_pkg: %u\n", topology_amd_nodes_per_pkg());
> seq_printf(m, "max_cores: %u\n", c->x86_max_cores);
> seq_printf(m, "max_die_per_pkg: %u\n", __max_die_per_package);
> seq_printf(m, "smp_num_siblings: %u\n", smp_num_siblings);
> --- a/arch/x86/kernel/cpu/topology.h
> +++ b/arch/x86/kernel/cpu/topology.h
> @@ -9,6 +9,10 @@ struct topo_scan {
>
> // Legacy CPUID[1]:EBX[23:16] number of logical processors
> unsigned int ebx1_nproc_shift;
> +
> + // AMD specific node ID which cannot be mapped into APIC space.
> + u16 amd_nodes_per_pkg;
> + u16 amd_node_id;
> };
>
> bool topo_is_converted(struct cpuinfo_x86 *c);
> @@ -17,6 +21,8 @@ void cpu_parse_topology(struct cpuinfo_x
> void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
> unsigned int shift, unsigned int ncpus);
> bool cpu_parse_topology_ext(struct topo_scan *tscan);
> +void cpu_parse_topology_amd(struct topo_scan *tscan);
> +void cpu_topology_fixup_amd(struct topo_scan *tscan);
>
> static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
> {
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/topology_amd.c
> @@ -0,0 +1,179 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/apic.h>
> +#include <asm/memtype.h>
> +#include <asm/processor.h>
> +
> +#include "cpu.h"
> +
> +static bool parse_8000_0008(struct topo_scan *tscan)
> +{
> + struct {
> + u32 ncores : 8,
> + __rsvd0 : 4,
> + apicidsize : 4,
> + perftscsize : 2,
> + __rsvd1 : 14;
> + } ecx;
> + unsigned int sft;
> +
> + if (tscan->c->extended_cpuid_level < 0x80000008)
> + return false;
> +
> + cpuid_leaf_reg(0x80000008, CPUID_ECX, &ecx);
> +
> + /* If the APIC ID size is 0, then get the shift value from ecx.ncores */
> + sft = ecx.apicidsize;
> + if (!sft)
> + sft = get_count_order(ecx.ncores + 1);
> +
> + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.ncores + 1);
> + return true;
> +}
> +
> +static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
> +{
> + /*
> + * Starting with Fam 17h the DIE domain could probably be used to
> + * retrieve the node info on AMD/HYGON. Analysis of CPUID dumps
> + * suggests its the topmost bit(s) of the CPU cores area, but
s/its/it's/
> + * that's guess work and neither enumerated nor documented.
> + *
> + * Up to Fam 16h this does not work at all and the legacy node ID
> + * has to be used.
> + */
> + tscan->amd_nodes_per_pkg = nr_nodes;
> + tscan->amd_node_id = node_id;
> +}
> +
> +static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
> +{
> + struct {
> + // eax
> + u32 x2apic_id : 32;
> + // ebx
> + u32 cuid : 8,
> + threads_per_cu : 8,
> + __rsvd0 : 16;
> + // ecx
> + u32 nodeid : 8,
> + nodes_per_pkg : 3,
> + __rsvd1 : 21;
> + // edx
> + u32 __rsvd2 : 32;
> + } leaf;
> +
> + if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
> + return false;
> +
> + cpuid_leaf(0x8000001e, &leaf);
> +
> + tscan->c->topo.initial_apicid = leaf.x2apic_id;
> +
> + /*
> + * If leaf 0xb is available, then SMT shift is set already. If not
> + * take it from ecx.threads_per_cpu and use topo_update_dom() as
"take it from ebx.threads_per_cu and use topology_update_dom() as"
> + * topology_set_dom() would propagate and overwrite the already
> + * propagated CORE level.
> + */
> + if (!has_0xb) {
> + topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(leaf.threads_per_cu),
> + leaf.threads_per_cu);
leaf.threads_per_cu needs to be (leaf.threads_per_cu + 1) above. If
the core has two hyper-threads, the value of the threads_per_cu
field returned by CPUID is "1".
All my Hyper-V VMs on AMD processors were coming up with only
one thread per core. The change fixes the problem.
Michael
> + }
> +
> + store_node(tscan, leaf.nodes_per_pkg + 1, leaf.nodeid);
> +
> + if (tscan->c->x86_vendor == X86_VENDOR_AMD) {
> + if (tscan->c->x86 == 0x15)
> + tscan->c->topo.cu_id = leaf.cuid;
> +
> + cacheinfo_amd_init_llc_id(tscan->c, leaf.nodeid);
> + } else {
> + /*
> + * Pacakge ID is ApicId[6..] on Hygon CPUs. See commit
> + * e0ceeae708ce for explanation. The topology info is
> + * screwed up: The package shift is always 6 and the node
> + * ID is bit [4:5]. Don't touch the latter without
> + * confirmation from the Hygon developers.
> + */
> + topology_set_dom(tscan, TOPO_CORE_DOMAIN, 6, tscan->dom_ncpus[TOPO_CORE_DOMAIN]);
> + cacheinfo_hygon_init_llc_id(tscan->c);
> + }
> + return true;
> +}
> +
> +static bool parse_fam10h_node_id(struct topo_scan *tscan)
> +{
> + struct {
> + union {
> + u64 node_id : 3,
> + nodes_per_pkg : 3,
> + unused : 58;
> + u64 msr;
> + };
> + } nid;
> +
> + if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
> + return false;
> +
> + rdmsrl(MSR_FAM10H_NODE_ID, nid.msr);
> + store_node(tscan, nid.nodes_per_pkg + 1, nid.node_id);
> + tscan->c->topo.llc_id = nid.node_id;
> + return true;
> +}
> +
> +static void legacy_set_llc(struct topo_scan *tscan)
> +{
> + unsigned int apicid = tscan->c->topo.initial_apicid;
> +
> + /* parse_8000_0008() set everything up except llc_id */
> + tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
> +}
> +
> +static void parse_topology_amd(struct topo_scan *tscan)
> +{
> + bool has_0xb = false;
> +
> + /*
> + * If the extended topology leaf 0x8000_001e is available
> + * try to get SMT and CORE shift from leaf 0xb first, then
> + * try to get the CORE shift from leaf 0x8000_0008.
> + */
> + if (boot_cpu_has(X86_FEATURE_TOPOEXT))
> + has_0xb = cpu_parse_topology_ext(tscan);
> +
> + if (!has_0xb && !parse_8000_0008(tscan))
> + return;
> +
> + /* Prefer leaf 0x8000001e if available */
> + if (parse_8000_001e(tscan, has_0xb))
> + return;
> +
> + /* Try the NODEID MSR */
> + if (parse_fam10h_node_id(tscan))
> + return;
> +
> + legacy_set_llc(tscan);
> +}
> +
> +void cpu_parse_topology_amd(struct topo_scan *tscan)
> +{
> + tscan->amd_nodes_per_pkg = 1;
> + parse_topology_amd(tscan);
> +
> + if (tscan->amd_nodes_per_pkg > 1)
> + set_cpu_cap(tscan->c, X86_FEATURE_AMD_DCM);
> +}
> +
> +void cpu_topology_fixup_amd(struct topo_scan *tscan)
> +{
> + struct cpuinfo_x86 *c = tscan->c;
> +
> + /*
> + * Adjust the core_id relative to the node when there is more than
> + * one node.
> + */
> + if (tscan->c->x86 < 0x17 && tscan->amd_nodes_per_pkg > 1)
> + c->topo.core_id %= tscan->dom_ncpus[TOPO_CORE_DOMAIN] / tscan->amd_nodes_per_pkg;
> +}
> --- a/arch/x86/kernel/cpu/topology_common.c
> +++ b/arch/x86/kernel/cpu/topology_common.c
> @@ -11,11 +11,13 @@
>
> struct x86_topology_system x86_topo_system __ro_after_init;
>
> +unsigned int __amd_nodes_per_pkg __ro_after_init;
> +EXPORT_SYMBOL_GPL(__amd_nodes_per_pkg);
> +
> void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
> unsigned int shift, unsigned int ncpus)
> {
> - tscan->dom_shifts[dom] = shift;
> - tscan->dom_ncpus[dom] = ncpus;
> + topology_update_dom(tscan, dom, shift, ncpus);
>
> /* Propagate to the upper levels */
> for (dom++; dom < TOPO_MAX_DOMAIN; dom++) {
> @@ -145,6 +147,13 @@ static void topo_set_ids(struct topo_sca
>
> /* Relative core ID */
> c->topo.core_id = topo_relative_domain_id(apicid, TOPO_CORE_DOMAIN);
> +
> + /* Temporary workaround */
> + if (tscan->amd_nodes_per_pkg)
> + c->topo.amd_node_id = c->topo.die_id = tscan->amd_node_id;
> +
> + if (c->x86_vendor == X86_VENDOR_AMD)
> + cpu_topology_fixup_amd(tscan);
> }
>
> static void topo_set_max_cores(struct topo_scan *tscan)
> @@ -229,4 +238,10 @@ void __init cpu_init_topology(struct cpu
> */
> __max_die_per_package = tscan.dom_ncpus[TOPO_DIE_DOMAIN] /
> tscan.dom_ncpus[TOPO_DIE_DOMAIN - 1];
> + /*
> + * AMD systems have Nodes per package which cannot be mapped to
> + * APIC ID (yet).
> + */
> + if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON)
> + __amd_nodes_per_pkg = __max_die_per_package = tscan.amd_nodes_per_pkg;
> }
Powered by blists - more mailing lists