linux-kernel - [PATCH 5/5] mce: recover from "action required" errors reported in data path in usermode

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <4e5eb50d2106324a55@agluck-desktop.sc.intel.com>
Date:	Wed, 31 Aug 2011 15:26:21 -0700
From:	"Luck, Tony" <tony.luck@...el.com>
To:	linux-kernel@...r.kernel.org
Cc:	"Ingo Molnar" <mingo@...e.hu>, "Borislav Petkov" <bp@...64.org>,
	"Hidetoshi Seto" <seto.hidetoshi@...fujitsu.com>
Subject: [PATCH 5/5] mce: recover from "action required" errors reported in data path in usermode

From:	"Luck, Tony" <tony.luck@...el.com>

From: Tony Luck <tony.luck@...el.com>

Two new entries in the mce severity table - one notes that data errors
observed by innocent bystanders (who happen to share a machine check
bank with the cpu experiencing the error) should be left alone by using
the "KEEP" severity.

Then inline in the do_machine_check() handler we process the user-mode
data error that was marked at MCE_AR_SEVERITY.  Even though we are in
"machine check context" it is almost safe to do so. We have already
released all the other cpus from rendezvous and we know that the cpu
with the error was executing user code - so it cannot have interrupts
locked out, or hold any locks. I.e. this is almost equivalent to a
page fault. Only difference (and risk) is that on x86_64 we are still
on the machine check stack - so if another machine check arrives, we
are toast (we didn't clear MCG_STATUS - yet, so cpu will reset rather
than taking a nested machine check on the same stack).

Signed-off-by: Tony Luck <tony.luck@...el.com>
---

Using the "KEEP" state avoids the complexity of my earlier solution
that sorted the cpus by severity and ran the more serious ones first.

 arch/x86/kernel/cpu/mcheck/mce-severity.c |   14 ++++++++++-
 arch/x86/kernel/cpu/mcheck/mce.c          |   35 ++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f..c4d8b24 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,6 +54,7 @@ static struct severity {
 #define  MASK(x, y)	.mask = x, .result = y
 #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
 #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define	MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
 #define MCACOD 0xffff
 
 	MCESEV(
@@ -102,11 +103,22 @@ static struct severity {
 		SER, BITCLR(MCI_STATUS_S)
 		),
 
-	/* AR add known MCACODs here */
 	MCESEV(
 		PANIC, "Action required with lost events",
 		SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
 		),
+
+	/* known AR MCACODs: */
+	MCESEV(
+		KEEP, "HT thread notices Action required: data load error",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134),
+		MCGMASK(MCG_STATUS_EIPV, 0)
+		),
+	MCESEV(
+		AR, "Action required: data load error",
+		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134),
+		USER
+		),
 	MCESEV(
 		PANIC, "Action required: unknown MCACOD",
 		SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 135e12d..2c59a34 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -996,12 +996,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 			continue;
 		}
 
-		/*
-		 * Kill on action required.
-		 */
-		if (severity == MCE_AR_SEVERITY)
-			kill_it = 1;
-
 		mce_read_aux(&m, i);
 
 		/*
@@ -1022,6 +1016,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		}
 	}
 
+	m = *final;
+
 	if (!no_way_out)
 		mce_clear_state(toclear);
 
@@ -1040,7 +1036,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * support MCE broadcasting or it has been disabled.
 	 */
 	if (no_way_out && tolerant < 3)
-		mce_panic("Fatal machine check on current CPU", final, msg);
+		mce_panic("Fatal machine check on current CPU", &m, msg);
 
 	/*
 	 * If the error seems to be unrecoverable, something should be
@@ -1049,11 +1045,24 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	 * high, don't try to do anything at all.
 	 */
 
-	if (kill_it && tolerant < 3)
+	if (worst != MCE_AR_SEVERITY && kill_it && tolerant < 3)
 		force_sig(SIGBUS, current);
 
 	if (worst > 0)
 		mce_report_event(regs);
+
+	if (worst == MCE_AR_SEVERITY) {
+		unsigned long pfn = m.addr >> PAGE_SHIFT;
+
+		pr_err("Uncorrected hardware memory error in user-access at %llx",
+			m.addr);
+		if (__memory_failure(pfn, MCE_VECTOR, 0) < 0) {
+			pr_err("Memory error not recovered");
+			force_sig(SIGBUS, current);
+		} else
+			pr_err("Memory error recovered");
+	}
+
 	mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
 out:
 	atomic_dec(&mce_entry);
@@ -1061,12 +1070,18 @@ out:
 }
 EXPORT_SYMBOL_GPL(do_machine_check);
 
-/* dummy to break dependency. actual code is in mm/memory-failure.c */
-void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+#ifndef CONFIG_MEMORY_FAILURE
+void memory_failure(unsigned long pfn, int vector)
 {
 	pr_err("Action optional memory failure at %lx ignored\n", pfn);
 }
 
+int __memory_failure(unsigned long pfn, int trapno, int flags)
+{
+	return -ENXIO;
+}
+#endif
+
 static void mce_process_work(struct work_struct *dummy)
 {
 	unsigned long pfn;
-- 
1.7.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/