linux-kernel - [PATCH 2/6] x86, nmi: Add new nmi type 'external'

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1400181949-157170-3-git-send-email-dzickus@redhat.com>
Date:	Thu, 15 May 2014 15:25:45 -0400
From:	Don Zickus <dzickus@...hat.com>
To:	x86@...nel.org
Cc:	Peter Zijlstra <peterz@...radead.org>,
	Andi Kleen <andi@...stfloor.org>, gong.chen@...ux.intel.com,
	LKML <linux-kernel@...r.kernel.org>, Elliott@...com,
	Don Zickus <dzickus@...hat.com>
Subject: [PATCH 2/6] x86, nmi:  Add new nmi type 'external'

I noticed when debugging a perf problem on a machine with GHES enabled,
perf seemed slow.  I then realized that the GHES NMI routine was taking
a global lock all the time to inspect the hardware.  This contended
with all the local perf counters which did not need a lock.  So each cpu
accidentally was synchronizing with itself when using perf.

This is because the way the nmi handler works.  It executes all the handlers
registered to a particular subtype (to deal with nmi sharing).  As a result
the GHES handler was executed on every PMI.

Fix this by creating a new nmi type called NMI_EXT, which is used by
handlers that need to probe external hardware and require a global lock
to do so.

Now the main NMI handler can check the internal NMI handlers first and
then the external ones if nothing is found.

This makes perf a little faster again on those machines with GHES enabled.

Signed-off-by: Don Zickus <dzickus@...hat.com>
---
 arch/x86/include/asm/nmi.h |    1 +
 arch/x86/kernel/nmi.c      |   41 ++++++++++++++++++++++++++---------------
 drivers/acpi/apei/ghes.c   |    4 ++--
 drivers/watchdog/hpwdt.c   |   10 ++--------
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 5f2fc44..8631a49 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -25,6 +25,7 @@ extern int unknown_nmi_panic;
 
 enum {
 	NMI_LOCAL=0,
+	NMI_EXT,
 	NMI_UNKNOWN,
 	NMI_SERR,
 	NMI_IO_CHECK,
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7b17864..b7c6f6b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -34,29 +34,32 @@
 #include <trace/events/nmi.h>
 
 struct nmi_desc {
-	spinlock_t lock;
-	struct list_head head;
+	spinlock_t		lock;
+	struct list_head	head;
 };
 
 static struct nmi_desc nmi_desc[NMI_MAX] = 
 {
-	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
-		.head = LIST_HEAD_INIT(nmi_desc[0].head),
+	[NMI_LOCAL] = {
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_LOCAL].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[NMI_LOCAL].head),
 	},
-	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
-		.head = LIST_HEAD_INIT(nmi_desc[1].head),
+	[NMI_EXT] = {
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_EXT].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[NMI_EXT].head),
 	},
-	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
-		.head = LIST_HEAD_INIT(nmi_desc[2].head),
+	[NMI_UNKNOWN] = {
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_UNKNOWN].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[NMI_UNKNOWN].head),
 	},
-	{
-		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
-		.head = LIST_HEAD_INIT(nmi_desc[3].head),
+	[NMI_SERR] = {
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_SERR].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[NMI_SERR].head),
+	},
+	[NMI_IO_CHECK] = {
+		.lock = __SPIN_LOCK_UNLOCKED(&nmi_desc[NMI_IO_CHECK].lock),
+		.head = LIST_HEAD_INIT(nmi_desc[NMI_IO_CHECK].head),
 	},
-
 };
 
 struct nmi_stats {
@@ -428,8 +431,16 @@ static __kprobes void default_do_nmi(struct pt_regs *regs)
 
 	__this_cpu_write(last_nmi_rip, regs->ip);
 
+	/* try local NMIs first */
 	handled = nmi_handle(NMI_LOCAL, regs, b2b);
 	__this_cpu_add(nmi_stats.normal, handled);
+
+	/* if unclaimed, try external NMIs next */
+	if (!handled) {
+		handled = nmi_handle(NMI_EXT, regs, b2b);
+		__this_cpu_add(nmi_stats.external, handled);
+	}
+
 	if (handled) {
 		/*
 		 * There are cases when a NMI handler handles multiple
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index dab7cb7..8b78bf5 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -976,7 +976,7 @@ static int ghes_probe(struct platform_device *ghes_dev)
 		ghes_estatus_pool_expand(len);
 		mutex_lock(&ghes_list_mutex);
 		if (list_empty(&ghes_nmi))
-			register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0,
+			register_nmi_handler(NMI_EXT, ghes_notify_nmi, 0,
 						"ghes");
 		list_add_rcu(&ghes->list, &ghes_nmi);
 		mutex_unlock(&ghes_list_mutex);
@@ -1025,7 +1025,7 @@ static int ghes_remove(struct platform_device *ghes_dev)
 		mutex_lock(&ghes_list_mutex);
 		list_del_rcu(&ghes->list);
 		if (list_empty(&ghes_nmi))
-			unregister_nmi_handler(NMI_LOCAL, "ghes");
+			unregister_nmi_handler(NMI_EXT, "ghes");
 		mutex_unlock(&ghes_list_mutex);
 		/*
 		 * To synchronize with NMI handler, ghes can only be
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index 75d2243..dd70ee7 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -736,12 +736,9 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 	retval = register_nmi_handler(NMI_UNKNOWN, hpwdt_pretimeout, 0, "hpwdt");
 	if (retval)
 		goto error;
-	retval = register_nmi_handler(NMI_SERR, hpwdt_pretimeout, 0, "hpwdt");
+	retval = register_nmi_handler(NMI_EXT, hpwdt_pretimeout, 0, "hpwdt");
 	if (retval)
 		goto error1;
-	retval = register_nmi_handler(NMI_IO_CHECK, hpwdt_pretimeout, 0, "hpwdt");
-	if (retval)
-		goto error2;
 
 	dev_info(&dev->dev,
 			"HP Watchdog Timer Driver: NMI decoding initialized"
@@ -749,8 +746,6 @@ static int hpwdt_init_nmi_decoding(struct pci_dev *dev)
 			(allow_kdump == 0) ? "OFF" : "ON");
 	return 0;
 
-error2:
-	unregister_nmi_handler(NMI_SERR, "hpwdt");
 error1:
 	unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
 error:
@@ -765,8 +760,7 @@ error:
 static void hpwdt_exit_nmi_decoding(void)
 {
 	unregister_nmi_handler(NMI_UNKNOWN, "hpwdt");
-	unregister_nmi_handler(NMI_SERR, "hpwdt");
-	unregister_nmi_handler(NMI_IO_CHECK, "hpwdt");
+	unregister_nmi_handler(NMI_EXT, "hpwdt");
 	if (cru_rom_addr)
 		iounmap(cru_rom_addr);
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/