lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1433277362-10911-10-git-send-email-Aravind.Gopalakrishnan@amd.com>
Date:	Tue, 2 Jun 2015 15:36:02 -0500
From:	Aravind Gopalakrishnan <Aravind.Gopalakrishnan@....com>
To:	<bp@...en8.de>, <dougthompson@...ssion.com>,
	<mchehab@....samsung.com>
CC:	<linux-edac@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
	<x86@...nel.org>
Subject: [PATCH V2 9/9] edac, mce_amd_inj: Inject errors on NBC for bank 4 errors

For bank 4 errors, MCE is logged and reported only on
node base cores. Refer D18F3x44[NbMcaToMstCpuEn] field in
Fam10h and later BKDGs.

This patch ensures that we inject the error on the node base core
for bank 4 errors. Otherwise, triggering #MC or apic interrupts on
a non node base core would not have any effect on the system.
(i.e), we would not see any relevant output on kernel logs for
the error we just injected.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@....com>
---
 drivers/edac/mce_amd_inj.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index b7e108c..45aac4f 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -17,9 +17,12 @@
 #include <linux/cpu.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
+#include <linux/pci.h>
 #include <asm/mce.h>
+#include <asm/amd_nb.h>
 
 #include "mce_amd.h"
+#include "amd64_edac.h"
 
 /*
  * Collect all the MCi_XXX settings
@@ -200,6 +203,44 @@ static void trigger_thr_int(void *info)
 	asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
 }
 
+static u32 amd_get_nbc_for_node(int node_id)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	u32 cores_per_node;
+
+	cores_per_node = c->x86_max_cores / amd_get_nodes_cnt();
+
+	return cores_per_node * node_id;
+}
+
+static void toggle_nb_mca_mst_cpu(u16 nid)
+{
+	struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+	u32 val;
+	int err;
+
+	if (!F3)
+		return;
+
+	err = pci_read_config_dword(F3, NBCFG, &val);
+	if (err) {
+		pr_err("%s: Error reading F%dx%03x.\n", __func__,
+			PCI_FUNC(F3->devfn),
+			NBCFG);
+		return;
+	}
+
+	if (!(val & BIT(27))) {
+		pr_err("%s: BIOS not setting D18F3x44[NbMcaToMstCpuEn]. Doing that here\n", __func__);
+		val |= BIT(27);
+		err = pci_write_config_dword(F3, NBCFG, val);
+		if (err)
+			pr_err("%s: Error writing F%dx%03x.\n", __func__,
+				PCI_FUNC(F3->devfn),
+				NBCFG);
+	}
+}
+
 static void do_inject(void)
 {
 	u64 mcg_status = 0;
@@ -235,6 +276,20 @@ static void do_inject(void)
 	if (!(i_mce.status & MCI_STATUS_PCC))
 		mcg_status |= MCG_STATUS_RIPV;
 
+	/*
+	 * For multi node cpus, logging and reporting of bank == 4 errors
+	 * happen only on the node base core. Refer D18F3x44[NbMcaToMstCpuEn]
+	 * for Fam10h and later BKDGs
+	 */
+	if (static_cpu_has(X86_FEATURE_AMD_DCM) && b == 4) {
+		/*
+		 * BIOS sets D18F3x44[NbMcaToMstCpuEn] by default.
+		 * But make sure of it here just in case..
+		 */
+		toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
+		cpu = amd_get_nbc_for_node(amd_get_nb_id(cpu));
+	}
+
 	toggle_hw_mce_inject(cpu, true);
 
 	wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS,
-- 
2.4.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ