lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20211108082832.142436-1-zhangzl2013@126.com>
Date:   Mon,  8 Nov 2021 16:28:32 +0800
From:   Zhaolong Zhang <zhangzl2013@....com>
To:     Tony Luck <tony.luck@...el.com>, Borislav Petkov <bp@...en8.de>,
        Zhaolong Zhang <zhangzl2013@....com>
Cc:     x86@...nel.org, linux-edac@...r.kernel.org,
        linux-kernel@...r.kernel.org,
        "Paul E . McKenney" <paulmck@...nel.org>
Subject: [PATCH] x86/mce: drop cpu_missing since we have more capable mce_missing_cpus

move mce_missing_cpus checking into mce_panic() as well, because we don't want
to lose the cpu missing information in case mca_cfg.tolerant > 1 and there is
no_way_out.

Signed-off-by: Zhaolong Zhang <zhangzl2013@....com>
---
 arch/x86/kernel/cpu/mce/core.c | 38 ++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 50a3e455cded..0bb59e68a457 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -99,7 +99,6 @@ struct mca_config mca_cfg __read_mostly = {
 
 static DEFINE_PER_CPU(struct mce, mces_seen);
 static unsigned long mce_need_notify;
-static int cpu_missing;
 
 /*
  * MCA banks polled by the period polling timer for corrected events.
@@ -253,6 +252,12 @@ static atomic_t mce_panicked;
 static int fake_panic;
 static atomic_t mce_fake_panicked;
 
+/*
+ * Track which CPUs entered the MCA broadcast synchronization and which not in
+ * order to print holdouts.
+ */
+static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
+
 /* Panic in progress. Enable interrupts and wait for final IPI */
 static void wait_for_panic(void)
 {
@@ -314,8 +319,13 @@ static void mce_panic(const char *msg, struct mce *final, char *exp)
 		if (!apei_err)
 			apei_err = apei_write_mce(final);
 	}
-	if (cpu_missing)
-		pr_emerg(HW_ERR "Some CPUs didn't answer in synchronization\n");
+	/*
+	 * cpu_online_mask == &mce_missing_cpus means it is reset and no timeout happens.
+	 */
+	if (!cpumask_equal(cpu_online_mask, &mce_missing_cpus) &&
+	    cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
+		pr_emerg(HW_ERR "CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
+			 cpumask_pr_args(&mce_missing_cpus));
 	if (exp)
 		pr_emerg(HW_ERR "Machine check: %s\n", exp);
 	if (!fake_panic) {
@@ -880,12 +890,6 @@ static atomic_t mce_executing;
  */
 static atomic_t mce_callin;
 
-/*
- * Track which CPUs entered the MCA broadcast synchronization and which not in
- * order to print holdouts.
- */
-static cpumask_t mce_missing_cpus = CPU_MASK_ALL;
-
 /*
  * Check if a timeout waiting for other CPUs happened.
  */
@@ -904,12 +908,8 @@ static int mce_timed_out(u64 *t, const char *msg)
 		goto out;
 	if ((s64)*t < SPINUNIT) {
 		if (mca_cfg.tolerant <= 1) {
-			if (cpumask_and(&mce_missing_cpus, cpu_online_mask, &mce_missing_cpus))
-				pr_emerg("CPUs not responding to MCE broadcast (may include false positives): %*pbl\n",
-					 cpumask_pr_args(&mce_missing_cpus));
 			mce_panic(msg, NULL, NULL);
 		}
-		cpu_missing = 1;
 		return 1;
 	}
 	*t -= SPINUNIT;
@@ -1079,8 +1079,10 @@ static int mce_end(int order)
 
 	if (!timeout)
 		goto reset;
-	if (order < 0)
+	if (order < 0) {
+		timeout = 0;
 		goto reset;
+	}
 
 	/*
 	 * Allow others to run.
@@ -1128,7 +1130,12 @@ static int mce_end(int order)
 reset:
 	atomic_set(&global_nwo, 0);
 	atomic_set(&mce_callin, 0);
-	cpumask_setall(&mce_missing_cpus);
+	/*
+ 	 * Don't reset mce_missing_cpus if there is mce_timed_out() so that
+ 	 * mce_panic() can report right thing.
+ 	 */
+	if (!((s64)timeout < SPINUNIT))
+		cpumask_setall(&mce_missing_cpus);
 	barrier();
 
 	/*
@@ -2720,7 +2727,6 @@ struct dentry *mce_get_debugfs_dir(void)
 
 static void mce_reset(void)
 {
-	cpu_missing = 0;
 	atomic_set(&mce_fake_panicked, 0);
 	atomic_set(&mce_executing, 0);
 	atomic_set(&mce_callin, 0);
-- 
2.27.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ