lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 12 Sep 2016 09:59:34 +0200
From:   Borislav Petkov <bp@...en8.de>
To:     X86 ML <x86@...nel.org>
Cc:     LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH 08/15] x86/mce/AMD, EDAC/mce_amd: Define and use tables for known SMCA IP types

From: Yazen Ghannam <Yazen.Ghannam@....com>

Scalable MCA defines a number of IP types. An MCA bank on an SMCA
system is defined as one of these IP types. A bank's type is uniquely
identified by the combination of the HWID and MCATYPE values read from
its MCA_IPID register.

Add the required tables in order to be able to lookup error descriptions
based on a bank's type and the error's extended error code.

Cc: Aravind Gopalakrishnan <aravindksg.lkml@...il.com>
Cc: Tony Luck <tony.luck@...el.com>
Cc: linux-edac <linux-edac@...r.kernel.org>
Cc: x86-ml <x86@...nel.org>
Link: http://lkml.kernel.org/r/1472741832-1690-1-git-send-email-Yazen.Ghannam@amd.com
Signed-off-by: Yazen Ghannam <Yazen.Ghannam@....com>
[ Align comments, simplify a bit. ]
Signed-off-by: Borislav Petkov <bp@...e.de>
---
 arch/x86/include/asm/mce.h           |  61 +++++------
 arch/x86/kernel/cpu/mcheck/mce_amd.c | 104 ++++++++++++++-----
 drivers/edac/mce_amd.c               | 194 +++++++----------------------------
 3 files changed, 147 insertions(+), 212 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 21bc5a3a4c89..9bd7ff5ffbcc 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -337,44 +337,47 @@ extern void apei_mce_report_mem_error(int corrected,
  * Scalable MCA.
  */
 #ifdef CONFIG_X86_MCE_AMD
-enum amd_ip_types {
-	SMCA_F17H_CORE = 0,	/* Core errors */
-	SMCA_DF,		/* Data Fabric */
-	SMCA_UMC,		/* Unified Memory Controller */
-	SMCA_PB,		/* Parameter Block */
-	SMCA_PSP,		/* Platform Security Processor */
-	SMCA_SMU,		/* System Management Unit */
-	N_AMD_IP_TYPES
-};
-
-struct amd_hwid {
-	const char *name;
-	unsigned int hwid;
-};
 
-extern struct amd_hwid amd_hwids[N_AMD_IP_TYPES];
-
-enum amd_core_mca_blocks {
+/* These may be used by multiple smca_hwid_mcatypes */
+enum smca_bank_types {
 	SMCA_LS = 0,	/* Load Store */
 	SMCA_IF,	/* Instruction Fetch */
-	SMCA_L2_CACHE,	/* L2 cache */
-	SMCA_DE,	/* Decoder unit */
-	RES,		/* Reserved */
-	SMCA_EX,	/* Execution unit */
+	SMCA_L2_CACHE,	/* L2 Cache */
+	SMCA_DE,	/* Decoder Unit */
+	SMCA_EX,	/* Execution Unit */
 	SMCA_FP,	/* Floating Point */
-	SMCA_L3_CACHE,	/* L3 cache */
-	N_CORE_MCA_BLOCKS
+	SMCA_L3_CACHE,	/* L3 Cache */
+	SMCA_CS,	/* Coherent Slave */
+	SMCA_PIE,	/* Power, Interrupts, etc. */
+	SMCA_UMC,	/* Unified Memory Controller */
+	SMCA_PB,	/* Parameter Block */
+	SMCA_PSP,	/* Platform Security Processor */
+	SMCA_SMU,	/* System Management Unit */
+	N_SMCA_BANK_TYPES
+};
+
+struct smca_bank_name {
+	const char *name;	/* Short name for sysfs */
+	const char *long_name;	/* Long name for pretty-printing */
 };
 
-extern const char * const amd_core_mcablock_names[N_CORE_MCA_BLOCKS];
+extern struct smca_bank_name smca_bank_names[N_SMCA_BANK_TYPES];
+
+#define HWID_MCATYPE(hwid, mcatype) ((hwid << 16) | mcatype)
 
-enum amd_df_mca_blocks {
-	SMCA_CS = 0,	/* Coherent Slave */
-	SMCA_PIE,	/* Power management, Interrupts, etc */
-	N_DF_BLOCKS
+struct smca_hwid_mcatype {
+	unsigned int bank_type;	/* Use with smca_bank_types for easy indexing. */
+	u32 hwid_mcatype;	/* (hwid,mcatype) tuple */
+	u32 xec_bitmap;		/* Bitmap of valid ExtErrorCodes; current max is 21. */
 };
 
-extern const char * const amd_df_mcablock_names[N_DF_BLOCKS];
+struct smca_bank_info {
+	struct smca_hwid_mcatype *type;
+	u32 type_instance;
+};
+
+extern struct smca_bank_info smca_banks[MAX_NR_BANKS];
+
 #endif
 
 #endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9da92fb2e073..3b74b62d0808 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -63,34 +63,55 @@ static const char * const th_names[] = {
 	"execution_unit",
 };
 
-/* Define HWID to IP type mappings for Scalable MCA */
-struct amd_hwid amd_hwids[] = {
-	[SMCA_F17H_CORE]	= { "f17h_core",	0xB0 },
-	[SMCA_DF]		= { "data_fabric",	0x2E },
-	[SMCA_UMC]		= { "umc",		0x96 },
-	[SMCA_PB]		= { "param_block",	0x5 },
-	[SMCA_PSP]		= { "psp",		0xFF },
-	[SMCA_SMU]		= { "smu",		0x1 },
+struct smca_bank_name smca_bank_names[] = {
+	[SMCA_LS]	= { "load_store",	"Load Store Unit" },
+	[SMCA_IF]	= { "insn_fetch",	"Instruction Fetch Unit" },
+	[SMCA_L2_CACHE]	= { "l2_cache",		"L2 Cache" },
+	[SMCA_DE]	= { "decode_unit",	"Decode Unit" },
+	[SMCA_EX]	= { "execution_unit",	"Execution Unit" },
+	[SMCA_FP]	= { "floating_point",	"Floating Point Unit" },
+	[SMCA_L3_CACHE]	= { "l3_cache",		"L3 Cache" },
+	[SMCA_CS]	= { "coherent_slave",	"Coherent Slave" },
+	[SMCA_PIE]	= { "pie",		"Power, Interrupts, etc." },
+	[SMCA_UMC]	= { "umc",		"Unified Memory Controller" },
+	[SMCA_PB]	= { "param_block",	"Parameter Block" },
+	[SMCA_PSP]	= { "psp",		"Platform Security Processor" },
+	[SMCA_SMU]	= { "smu",		"System Management Unit" },
 };
-EXPORT_SYMBOL_GPL(amd_hwids);
-
-const char * const amd_core_mcablock_names[] = {
-	[SMCA_LS]		= "load_store",
-	[SMCA_IF]		= "insn_fetch",
-	[SMCA_L2_CACHE]		= "l2_cache",
-	[SMCA_DE]		= "decode_unit",
-	[RES]			= "",
-	[SMCA_EX]		= "execution_unit",
-	[SMCA_FP]		= "floating_point",
-	[SMCA_L3_CACHE]		= "l3_cache",
-};
-EXPORT_SYMBOL_GPL(amd_core_mcablock_names);
+EXPORT_SYMBOL_GPL(smca_bank_names);
+
+static struct smca_hwid_mcatype smca_hwid_mcatypes[] = {
+	/* { bank_type, hwid_mcatype, xec_bitmap } */
+
+	/* ZN Core (HWID=0xB0) MCA types */
+	{ SMCA_LS,	 HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+	{ SMCA_IF,	 HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
+	{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
+	{ SMCA_DE,	 HWID_MCATYPE(0xB0, 0x3), 0x1FF },
+	/* HWID 0xB0 MCATYPE 0x4 is Reserved */
+	{ SMCA_EX,	 HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+	{ SMCA_FP,	 HWID_MCATYPE(0xB0, 0x6), 0x7F },
+	{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
+
+	/* Data Fabric MCA types */
+	{ SMCA_CS,	 HWID_MCATYPE(0x2E, 0x0), 0x1FF },
+	{ SMCA_PIE,	 HWID_MCATYPE(0x2E, 0x1), 0xF },
+
+	/* Unified Memory Controller MCA type */
+	{ SMCA_UMC,	 HWID_MCATYPE(0x96, 0x0), 0x3F },
+
+	/* Parameter Block MCA type */
+	{ SMCA_PB,	 HWID_MCATYPE(0x05, 0x0), 0x1 },
 
-const char * const amd_df_mcablock_names[] = {
-	[SMCA_CS]		= "coherent_slave",
-	[SMCA_PIE]		= "pie",
+	/* Platform Security Processor MCA type */
+	{ SMCA_PSP,	 HWID_MCATYPE(0xFF, 0x0), 0x1 },
+
+	/* System Management Unit MCA type */
+	{ SMCA_SMU,	 HWID_MCATYPE(0x01, 0x0), 0x1 },
 };
-EXPORT_SYMBOL_GPL(amd_df_mcablock_names);
+
+struct smca_bank_info smca_banks[MAX_NR_BANKS];
+EXPORT_SYMBOL_GPL(smca_banks);
 
 static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks);
 static DEFINE_PER_CPU(unsigned int, bank_map);	/* see which banks are on */
@@ -108,6 +129,36 @@ void (*deferred_error_int_vector)(void) = default_deferred_error_interrupt;
  * CPU Initialization
  */
 
+static void get_smca_bank_info(unsigned int bank)
+{
+	unsigned int i, hwid_mcatype, cpu = smp_processor_id();
+	struct smca_hwid_mcatype *type;
+	u32 high, instanceId;
+	u16 hwid, mcatype;
+
+	/* Collect bank_info using CPU 0 for now. */
+	if (cpu)
+		return;
+
+	if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &instanceId, &high)) {
+		pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
+		return;
+	}
+
+	hwid = high & MCI_IPID_HWID;
+	mcatype = (high & MCI_IPID_MCATYPE) >> 16;
+	hwid_mcatype = HWID_MCATYPE(hwid, mcatype);
+
+	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
+		type = &smca_hwid_mcatypes[i];
+		if (hwid_mcatype == type->hwid_mcatype) {
+			smca_banks[bank].type = type;
+			smca_banks[bank].type_instance = instanceId;
+			break;
+		}
+	}
+}
+
 struct thresh_restart {
 	struct threshold_block	*b;
 	int			reset;
@@ -425,6 +476,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	int offset = -1;
 
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
+		if (mce_flags.smca)
+			get_smca_bank_info(bank);
+
 		for (block = 0; block < NR_BLOCKS; ++block) {
 			address = get_block_address(cpu, address, low, high, bank, block);
 			if (!address)
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ea549a94361b..99b3bf3f4182 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -283,6 +283,27 @@ static const char * const smca_smu_mce_desc[] = {
 	"SMU RAM ECC or parity error",
 };
 
+struct smca_mce_desc {
+	const char * const *descs;
+	unsigned int num_descs;
+};
+
+static struct smca_mce_desc smca_mce_descs[] = {
+	[SMCA_LS]	= { smca_ls_mce_desc,	ARRAY_SIZE(smca_ls_mce_desc)	},
+	[SMCA_IF]	= { smca_if_mce_desc,	ARRAY_SIZE(smca_if_mce_desc)	},
+	[SMCA_L2_CACHE]	= { smca_l2_mce_desc,	ARRAY_SIZE(smca_l2_mce_desc)	},
+	[SMCA_DE]	= { smca_de_mce_desc,	ARRAY_SIZE(smca_de_mce_desc)	},
+	[SMCA_EX]	= { smca_ex_mce_desc,	ARRAY_SIZE(smca_ex_mce_desc)	},
+	[SMCA_FP]	= { smca_fp_mce_desc,	ARRAY_SIZE(smca_fp_mce_desc)	},
+	[SMCA_L3_CACHE]	= { smca_l3_mce_desc,	ARRAY_SIZE(smca_l3_mce_desc)	},
+	[SMCA_CS]	= { smca_cs_mce_desc,	ARRAY_SIZE(smca_cs_mce_desc)	},
+	[SMCA_PIE]	= { smca_pie_mce_desc,	ARRAY_SIZE(smca_pie_mce_desc)	},
+	[SMCA_UMC]	= { smca_umc_mce_desc,	ARRAY_SIZE(smca_umc_mce_desc)	},
+	[SMCA_PB]	= { smca_pb_mce_desc,	ARRAY_SIZE(smca_pb_mce_desc)	},
+	[SMCA_PSP]	= { smca_psp_mce_desc,	ARRAY_SIZE(smca_psp_mce_desc)	},
+	[SMCA_SMU]	= { smca_smu_mce_desc,	ARRAY_SIZE(smca_smu_mce_desc)	},
+};
+
 static bool f12h_mc0_mce(u16 ec, u8 xec)
 {
 	bool ret = false;
@@ -827,175 +848,32 @@ static void decode_mc6_mce(struct mce *m)
 	pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n");
 }
 
-static void decode_f17h_core_errors(const char *ip_name, u8 xec,
-				   unsigned int mca_type)
-{
-	const char * const *error_desc_array;
-	size_t len;
-
-	pr_emerg(HW_ERR "%s Error: ", ip_name);
-
-	switch (mca_type) {
-	case SMCA_LS:
-		error_desc_array = smca_ls_mce_desc;
-		len = ARRAY_SIZE(smca_ls_mce_desc) - 1;
-
-		if (xec == 0x4) {
-			pr_cont("Unrecognized LS MCA error code.\n");
-			return;
-		}
-		break;
-
-	case SMCA_IF:
-		error_desc_array = smca_if_mce_desc;
-		len = ARRAY_SIZE(smca_if_mce_desc) - 1;
-		break;
-
-	case SMCA_L2_CACHE:
-		error_desc_array = smca_l2_mce_desc;
-		len = ARRAY_SIZE(smca_l2_mce_desc) - 1;
-		break;
-
-	case SMCA_DE:
-		error_desc_array = smca_de_mce_desc;
-		len = ARRAY_SIZE(smca_de_mce_desc) - 1;
-		break;
-
-	case SMCA_EX:
-		error_desc_array = smca_ex_mce_desc;
-		len = ARRAY_SIZE(smca_ex_mce_desc) - 1;
-		break;
-
-	case SMCA_FP:
-		error_desc_array = smca_fp_mce_desc;
-		len = ARRAY_SIZE(smca_fp_mce_desc) - 1;
-		break;
-
-	case SMCA_L3_CACHE:
-		error_desc_array = smca_l3_mce_desc;
-		len = ARRAY_SIZE(smca_l3_mce_desc) - 1;
-		break;
-
-	default:
-		pr_cont("Corrupted MCA core error info.\n");
-		return;
-	}
-
-	if (xec > len) {
-		pr_cont("Unrecognized %s MCA bank error code.\n",
-			 amd_core_mcablock_names[mca_type]);
-		return;
-	}
-
-	pr_cont("%s.\n", error_desc_array[xec]);
-}
-
-static void decode_df_errors(u8 xec, unsigned int mca_type)
-{
-	const char * const *error_desc_array;
-	size_t len;
-
-	pr_emerg(HW_ERR "Data Fabric Error: ");
-
-	switch (mca_type) {
-	case  SMCA_CS:
-		error_desc_array = smca_cs_mce_desc;
-		len = ARRAY_SIZE(smca_cs_mce_desc) - 1;
-		break;
-
-	case SMCA_PIE:
-		error_desc_array = smca_pie_mce_desc;
-		len = ARRAY_SIZE(smca_pie_mce_desc) - 1;
-		break;
-
-	default:
-		pr_cont("Corrupted MCA Data Fabric info.\n");
-		return;
-	}
-
-	if (xec > len) {
-		pr_cont("Unrecognized %s MCA bank error code.\n",
-			 amd_df_mcablock_names[mca_type]);
-		return;
-	}
-
-	pr_cont("%s.\n", error_desc_array[xec]);
-}
-
 /* Decode errors according to Scalable MCA specification */
 static void decode_smca_errors(struct mce *m)
 {
-	u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank);
-	unsigned int hwid, mca_type, i;
-	u8 xec = XEC(m->status, xec_mask);
-	const char * const *error_desc_array;
+	struct smca_hwid_mcatype *type;
+	unsigned int bank_type;
 	const char *ip_name;
-	u32 low, high;
-	size_t len;
+	u8 xec = XEC(m->status, xec_mask);
 
-	if (rdmsr_safe(addr, &low, &high)) {
-		pr_emerg(HW_ERR "Invalid IP block specified.\n");
+	if (m->bank >= ARRAY_SIZE(smca_banks))
 		return;
-	}
-
-	hwid = high & MCI_IPID_HWID;
-	mca_type = (high & MCI_IPID_MCATYPE) >> 16;
 
-	pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low);
-
-	/*
-	 * Based on hwid and mca_type values, decode errors from respective IPs.
-	 * Note: mca_type values make sense only in the context of an hwid.
-	 */
-	for (i = 0; i < ARRAY_SIZE(amd_hwids); i++)
-		if (amd_hwids[i].hwid == hwid)
-			break;
-
-	switch (i) {
-	case SMCA_F17H_CORE:
-		ip_name = (mca_type == SMCA_L3_CACHE) ?
-			  "L3 Cache" : "F17h Core";
-		return decode_f17h_core_errors(ip_name, xec, mca_type);
-		break;
-
-	case SMCA_DF:
-		return decode_df_errors(xec, mca_type);
-		break;
-
-	case SMCA_UMC:
-		error_desc_array = smca_umc_mce_desc;
-		len = ARRAY_SIZE(smca_umc_mce_desc) - 1;
-		break;
-
-	case SMCA_PB:
-		error_desc_array = smca_pb_mce_desc;
-		len = ARRAY_SIZE(smca_pb_mce_desc) - 1;
-		break;
-
-	case SMCA_PSP:
-		error_desc_array = smca_psp_mce_desc;
-		len = ARRAY_SIZE(smca_psp_mce_desc) - 1;
-		break;
-
-	case SMCA_SMU:
-		error_desc_array = smca_smu_mce_desc;
-		len = ARRAY_SIZE(smca_smu_mce_desc) - 1;
-		break;
-
-	default:
-		pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid);
+	type = smca_banks[m->bank].type;
+	if (!type)
 		return;
-	}
 
-	ip_name = amd_hwids[i].name;
-	pr_emerg(HW_ERR "%s Error: ", ip_name);
+	bank_type = type->bank_type;
+	ip_name = smca_bank_names[bank_type].long_name;
 
-	if (xec > len) {
-		pr_cont("Unrecognized %s MCA bank error code.\n", ip_name);
-		return;
-	}
+	pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
 
-	pr_cont("%s.\n", error_desc_array[xec]);
+	/* Only print the decode of valid error codes */
+	if (xec < smca_mce_descs[bank_type].num_descs &&
+			(type->xec_bitmap & BIT_ULL(xec))) {
+		pr_emerg(HW_ERR "%s Error: ", ip_name);
+		pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+	}
 }
 
 static inline void amd_decode_err_code(u16 ec)
-- 
2.10.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ