linux-kernel - [Patch V1] x86, mce: CPU synchronization for broadcast MCE's is surprised by offline CPUs

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1441931198-3209-1-git-send-email-ashok.raj@intel.com>
Date:	Thu, 10 Sep 2015 20:26:38 -0400
From:	Ashok Raj <ashok.raj@...el.com>
To:	linux-kernel@...r.kernel.org
Cc:	Ashok Raj <ashok.raj@...el.com>, linux-edac@...r.kernel.org,
	Boris Petkov <bp@...e.de>, Tony Luck <tony.luck@...el.com>
Subject: [Patch V1] x86, mce: CPU synchronization for broadcast MCE's is surprised by offline CPUs

Linux has logical CPU offline, supported as shown below.

#echo 0 > /sys/devices/system/cpu/cpuX/online

Hardware doesn't know about OS offlining, hence hardware will always
broadcast any MCE to all CPUs in the system, even it its parked in
cpu_dead.

mce_start() and mce_end() should use cpu_present_map to count CPUs in
rendezvous. Offline cpu is also in the MCE domain, so its going
to execute do_machine_check(). This will increment mce_callin. This
will result in always cpus incrementing would be off by the number
of CPUs offined.

This patch does the following.

- Allow MCE logging from CPUs logically offlined.
- Ensure the offline CPU wil not be choosen as the rendezvous master CPU
- Collect logs from the offline cpu and report them via rendezvous master.

Signed-off-by: Ashok Raj <ashok.raj@...el.com>
Reviewed-by: Tony Luck <tony.luck@...el.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 101 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 96 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 69c7e3c..7c6b8b2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -769,6 +769,63 @@ out:
 }
 
 /*
+ * We can't call mce_log() for offline CPUs because it uses RCU for
+ * synchronization. (and may call arbitrary driver code via
+ * x86_mce_decoder_chain that may also be surprised at being called
+ * from an offline CPU). Provide enough buffer space to hold a few
+ * errors that can be picked up later. We don't care about overflow
+ * here, since this is supposed to be really rare, so not doing any
+ * tracking for overflow.
+ */
+
+#define OFFLINE_CPU_LOG_LEN	16
+
+struct offline_cpu_mce {
+	unsigned short head;
+	unsigned short tail;
+	struct mce mce_log[OFFLINE_CPU_LOG_LEN];
+};
+
+static struct offline_cpu_mce offline_mce;
+static unsigned int offline_mce_overflow = 0;
+
+/*
+ * Add mce's discovered in offline cpu which will be logged by the
+ * MCE rendezvous master. There is no lock required, since MCE's are
+ * processed one cpu at a time, sequenced by the rendezvous master CPU
+ * Safe to be called only from MCE handler.
+ */
+static int offline_mce_add(struct mce *m)
+{
+	unsigned next;
+
+	next = (offline_mce.tail + 1) % OFFLINE_CPU_LOG_LEN;
+	if (next == offline_mce.head) {
+		offline_mce_overflow++;
+		return -1;
+	}
+
+	offline_mce.mce_log[offline_mce.tail] = *m;
+	offline_mce.tail = next;
+	return 0;
+}
+
+static int offline_mce_get(struct mce *m)
+{
+	int ret = 0;
+
+	if (offline_mce.head == offline_mce.tail)
+		goto out;
+
+	*m = offline_mce.mce_log[offline_mce.head];
+	offline_mce.head = (offline_mce.head + 1) % OFFLINE_CPU_LOG_LEN;
+
+	ret = 1;
+out:
+	return ret;
+}
+
+/*
  * The Monarch's reign.  The Monarch is the CPU who entered
  * the machine check handler first. It waits for the others to
  * raise the exception too and then grades them. When any
@@ -799,13 +856,31 @@ static void mce_reign(void)
 	int global_worst = 0;
 	char *msg = NULL;
 	char *nmsg = NULL;
+	struct mce offline_mce;
+
+
+	/*
+	 * If there are any MCE's logged by offline CPU's, lets
+	 * gather and report them via mce_log
+	 */
+	while (offline_mce_get(&offline_mce))
+		mce_log(&offline_mce);
+
+	if (offline_mce_overflow) {
+		pr_info (HW_ERR "Lost %d errors logged by offline CPUs\n",
+			offline_mce_overflow);
+		offline_mce_overflow = 0;
+	}
 
 	/*
 	 * This CPU is the Monarch and the other CPUs have run
 	 * through their handlers.
 	 * Grade the severity of the errors of all the CPUs.
+	 * Intel CPUs broadcast MCE's to all cpus booted.
+	 * Even if they are merely parked in the OS for logical offline
+	 * they also should process MCE.
 	 */
-	for_each_possible_cpu(cpu) {
+	for_each_present_cpu(cpu) {
 		int severity = mce_severity(&per_cpu(mces_seen, cpu),
 					    mca_cfg.tolerant,
 					    &nmsg, true);
@@ -841,7 +916,7 @@ static void mce_reign(void)
 	 * Now clear all the mces_seen so that they don't reappear on
 	 * the next mce.
 	 */
-	for_each_possible_cpu(cpu)
+	for_each_present_cpu(cpu)
 		memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce));
 }
 
@@ -857,8 +932,9 @@ static atomic_t global_nwo;
 static int mce_start(int *no_way_out)
 {
 	int order;
-	int cpus = num_online_cpus();
+	int cpus = num_present_cpus();
 	u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC;
+	unsigned int this_cpu = smp_processor_id();
 
 	if (!timeout)
 		return -1;
@@ -868,6 +944,16 @@ static int mce_start(int *no_way_out)
 	 * global_nwo should be updated before mce_callin
 	 */
 	smp_wmb();
+
+	/*
+	 * If this cpu is offline, make sure it won't be elected as
+	 * rendezvous master
+	 */
+	if (cpu_is_offline(this_cpu)) {
+		while (!atomic_read(&mce_callin))
+			ndelay(SPINUNIT);
+	}
+
 	order = atomic_inc_return(&mce_callin);
 
 	/*
@@ -938,7 +1024,7 @@ static int mce_end(int order)
 
 	if (order == 1) {
 		/* CHECKME: Can this race with a parallel hotplug? */
-		int cpus = num_online_cpus();
+		int cpus = num_present_cpus();
 
 		/*
 		 * Monarch: Wait for everyone to go through their scanning
@@ -1033,6 +1119,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 	int i;
 	int worst = 0;
 	int severity;
+	unsigned int cpu = smp_processor_id();
+
 	/*
 	 * Establish sequential order between the CPUs entering the machine
 	 * check handler.
@@ -1153,7 +1241,10 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
 			mce_ring_add(m.addr >> PAGE_SHIFT);
 
-		mce_log(&m);
+		if (cpu_is_offline(cpu))
+			offline_mce_add(&m);
+		else
+			mce_log(&m);
 
 		if (severity > worst) {
 			*final = m;
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/