lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1318346686-12349-3-git-send-email-dzickus@redhat.com>
Date:	Tue, 11 Oct 2011 11:24:45 -0400
From:	Don Zickus <dzickus@...hat.com>
To:	Ingo Molnar <mingo@...e.hu>, Andi Kleen <andi@...stfloor.org>,
	<x86@...nel.org>
Cc:	LKML <linux-kernel@...r.kernel.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Robert Richter <robert.richter@....com>,
	Andrew Morton <akpm@...ux-foundation.org>,
	seiji.aguchi@....com, vgoyal@...hat.com, mjg@...hat.com,
	tony.luck@...el.com, gong.chen@...el.com, satoru.moriya@....com,
	avi@...hat.com, Don Zickus <dzickus@...hat.com>
Subject: [PATCH 2/3] x86, NMI: Add NMI IPI selftest

The previous patch modified the stop cpus path to use NMI instead of IRQ
as the way to communicate to the other cpus to shutdown.  There were some
concerns that various machines may have problems with using an NMI IPI.

This patch creates a selftest to check if NMI IPI is working at boot.
The idea is to help catch any issues before the machine panics and we
learn the hard way. :-)

If the selftest fails, the code tries to fall back to the original IRQ
method.

Signed-off-by: Don Zickus <dzickus@...hat.com>
---
 arch/x86/Kconfig.debug     |   12 +++++++++
 arch/x86/include/asm/smp.h |    1 +
 arch/x86/kernel/smp.c      |    5 ++++
 arch/x86/kernel/smpboot.c  |   57 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index c0f8a5c..18261c2 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -284,4 +284,16 @@ config DEBUG_STRICT_USER_COPY_CHECKS
 
 	  If unsure, or if you run an older (pre 4.4) gcc, say N.
 
+config DEBUG_NMI_IPI_SELFTEST
+	bool "NMI IPI Selftest"
+	depends on DEBUG_KERNEL
+	---help---
+	  Enabling this option turns on a quick NMI IPI selftest to
+	  verify that sending an NMI this way works for the panic and
+	  kdump paths.
+
+	  This might help diagnose strange hangs in those paths.
+
+	  If unsure, say N.
+
 endmenu
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 73b11bc..916f89d 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -149,6 +149,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 }
 
 void cpu_disable_common(void);
+void native_smp_disable_nmi_ipi(void);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 7bdbf6a..f54e07a 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -247,6 +247,11 @@ static void native_irq_stop_other_cpus(int wait)
 	local_irq_restore(flags);
 }
 
+void native_smp_disable_nmi_ipi(void)
+{
+	smp_ops.stop_other_cpus = native_irq_stop_other_cpus;
+}
+
 /*
  * Reschedule call back.
  */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 9f548cb..dcb889f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -71,6 +71,7 @@
 
 #include <asm/smpboot_hooks.h>
 #include <asm/i8259.h>
+#include <asm/nmi.h>
 
 /* State of each CPU */
 DEFINE_PER_CPU(int, cpu_state) = { 0 };
@@ -415,6 +416,57 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
 		return cpu_llc_shared_mask(cpu);
 }
 
+#ifdef CONFIG_DEBUG_NMI_IPI_SELFTEST
+/* check to see if NMI IPIs work on this machine */
+static DECLARE_BITMAP(nmi_ipi_mask, NR_CPUS) __read_mostly;
+
+static int check_nmi_ipi_callback(unsigned int val, struct pt_regs *regs)
+{
+	int cpu = raw_smp_processor_id();
+
+	if (cpumask_test_cpu(cpu, to_cpumask(nmi_ipi_mask))) {
+		cpumask_clear_cpu(cpu, to_cpumask(nmi_ipi_mask));
+		return NMI_HANDLED;
+	}
+
+	return NMI_DONE;
+}
+
+static bool check_nmi_ipi(void)
+{
+	unsigned long timeout;
+
+	if (num_online_cpus() > 1) {
+		cpumask_copy(to_cpumask(nmi_ipi_mask), cpu_online_mask);
+		cpumask_clear_cpu(smp_processor_id(), to_cpumask(nmi_ipi_mask));
+
+		if (register_nmi_handler(NMI_LOCAL, check_nmi_ipi_callback,
+					 NMI_FLAG_FIRST, "check_nmi_ipi"))
+			return false;
+
+		/* sync above data before sending NMI */
+		wmb();
+
+		apic->send_IPI_allbutself(NMI_VECTOR);
+
+		/* Don't wait longer than a second */
+		timeout = USEC_PER_SEC;
+		while (!cpumask_empty(to_cpumask(nmi_ipi_mask)) && timeout--)
+			udelay(1);
+
+		/* What happens if we timeout, do we still unregister?? */
+		unregister_nmi_handler(NMI_LOCAL, "check_nmi_ipi");
+
+		if (!timeout) {
+			pr_warn("CPU does not support NMI IPIs\n");
+			return false;
+		}
+	}
+	pr_info("NMI IPIs selftest passed\n");
+	return true;
+}
+#endif
+
 static void impress_friends(void)
 {
 	int cpu;
@@ -1142,6 +1194,11 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
 {
 	pr_debug("Boot done.\n");
 
+#ifdef CONFIG_DEBUG_NMI_IPI_SELFTEST
+	/* if NMI IPI doesn't work, fallback to old irq method for panic */
+	if (check_nmi_ipi())
+		native_smp_disable_nmi_ipi();
+#endif
 	impress_friends();
 #ifdef CONFIG_X86_IO_APIC
 	setup_ioapic_dest();
-- 
1.7.6.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ