lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 27 May 2015 14:03:38 -0500
From:	Aravind Gopalakrishnan <Aravind.Gopalakrishnan@....com>
To:	<bp@...en8.de>, <dougthompson@...ssion.com>,
	<mchehab@....samsung.com>
CC:	<linux-edac@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: [PATCH 6/6] edac, mce_amd_inj: Inject errors on NBC for bank 4 errors

For bank 4 errors, MCE logging and reporting is done only on
node base cores. Refer D18F3x44[NbMcaToMstCpuEn] field in
Fam10h and later BKDGs.

This patch ensures that we inject the error on the node base core
for bank 4 errors. Otherwise, triggering #MC or apic interrupts on
a non node base core would not have any effect on the system.
(i.e), we would not see any relevant output on kernel logs for
the error we just injected.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@....com>
---
 drivers/edac/mce_amd_inj.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index ca5b29f..bd44ba4 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -17,9 +17,12 @@
 #include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/pci.h>
 #include <asm/mce.h>
+#include <asm/amd_nb.h>
 
 #include "mce_amd.h"
+#include "amd64_edac.h"
 
 /*
  * Collect all the MCi_XXX settings
@@ -205,6 +208,65 @@ static void trigger_thr_int(void *info)
 	asm volatile("int $249");
 }
 
+static u32 amd_get_num_nodes(void)
+{
+	u32 nodes = 1;
+
+	if (cpu_has_topoext) {
+		u32 ecx;
+
+		ecx = cpuid_ecx(0x8000001e);
+		nodes = ((ecx >> 8) & 7) + 1;
+	} else if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
+		u64 value;
+
+		rdmsrl(MSR_FAM10H_NODE_ID, value);
+		nodes = ((value >> 3) & 7) + 1;
+	}
+
+	return nodes;
+}
+
+static u32 amd_get_nbc_for_node(int node_id)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 nodes, cores_per_node;
+
+	nodes = amd_get_num_nodes();
+	cores_per_node = c->x86_max_cores / nodes;
+
+	return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+	u32 val;
+	int err;
+
+	if (!F3)
+		return;
+
+	err = pci_read_config_dword(F3, NBCFG, &val);
+	if (err) {
+		pr_err("%s: Error reading F%dx%03x.\n", __func__,
+			PCI_FUNC(F3->devfn),
+			NBCFG);
+		return;
+	}
+
+	if (!(val & BIT(27))) {
+		pr_err("%s: BIOS not setting D18F3x44[NbMcaToMstCpuEn]."
+		       "Doing that here\n", __func__);
+		val |= BIT(27);
+		err = pci_write_config_dword(F3, NBCFG, val);
+		if (err)
+			pr_err("%s: Error writing F%dx%03x.\n", __func__,
+				PCI_FUNC(F3->devfn),
+				NBCFG);
+	}
+}
+
 static void do_inject(void)
 {
 	u64 mcg_status = 0;
@@ -240,6 +302,20 @@ static void do_inject(void)
 	if (!(i_mce.status & MCI_STATUS_PCC))
 		mcg_status |= MCG_STATUS_RIPV;
 
+	/*
+	 * For multi node cpus, logging and reporting of bank == 4 errors
+	 * happen only on the node base core. Refer D18F3x44[NbMcaToMstCpuEn]
+	 * for Fam10h and later BKDGs
+	 */
+	if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+		/*
+		 * BIOS sets D18F3x44[NbMcaToMstCpuEn] by default.
+		 * But make sure of it here just in case..
+		 */
+		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+		cpu = amd_get_nbc_for_node(amd_get_nb_id(cpu));
+	}
+
 	toggle_hw_mce_inject(cpu, true);
 
 	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
-- 
2.4.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ