linux-kernel - Re: [PATCH 0/3 v2] nmi perf fixes

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1284126770.402.60.camel@laptop>
Date:	Fri, 10 Sep 2010 15:52:50 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Don Zickus <dzickus@...hat.com>
Cc:	mingo@...e.hu, robert.richter@....com, gorcunov@...il.com,
	fweisbec@...il.com, linux-kernel@...r.kernel.org,
	ying.huang@...el.com, ming.m.lin@...el.com, yinghai@...nel.org,
	andi@...stfloor.org, eranian@...gle.com
Subject: Re: [PATCH 0/3 v2] nmi perf fixes

On Fri, 2010-09-10 at 15:34 +0200, Peter Zijlstra wrote:
> > I'll look at getting a trace of the thing, but if any of you has a
> > bright idea...
> 
           <...>-3164  [000]    51.658621: perf_event_nmi_handler: NMI: 14146 6078 1
           <...>-3164  [000]    51.658622: x86_pmu_handle_irq: OVERFLOW: 1
           <...>-3164  [000]    51.658622: x86_pmu_handle_irq: HANDLED: 1
           <...>-3164  [000]    51.658624: perf_event_nmi_handler: NMI-handled(1): 14146 6078 1
           <...>-3164  [000]    51.658625: perf_event_nmi_handler: NMI-stop: 14146 6078 1
           <...>-3164  [000]    51.658627: perf_event_nmi_handler: NMI: 14147 6078 1
           <...>-3164  [000]    51.658627: x86_pmu_handle_irq: OVERFLOW: 1
           <...>-3164  [000]    51.658628: x86_pmu_handle_irq: HANDLED: 1
           <...>-3164  [000]    51.658631: perf_event_nmi_handler: NMI-handled(1): 14147 6078 1
           <...>-3164  [000]    51.658631: perf_event_nmi_handler: NMI-stop: 14147 6078 1
           <...>-3164  [000]    51.658633: perf_event_nmi_handler: NMI: 14148 6078 1
           <...>-3164  [000]    51.658634: perf_event_nmi_handler: NMI-handled(0): 14148 6078 1
           <...>-3164  [000]    51.658635: perf_event_nmi_handler: NMI: 14148 6078 1
           <...>-3164  [000]    51.658636: perf_event_nmi_handler: NMI-handled(0): 14148 6078 1
           <...>-3164  [000]    51.658637: perf_event_nmi_handler: NMI: 14148 6078 1
           <...>-3164  [000]    51.658637: perf_event_nmi_handler: NMI-fail

That seems to be clear enough.. no idea where that extra NMI comes from.
Robert any clue?

---
 arch/x86/kernel/cpu/perf_event.c |   39 +++++++++++++++++++++++++++++++++++++-
 1 files changed, 38 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index de6569c..5e9921c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1127,6 +1127,20 @@ static void x86_pmu_disable(struct perf_event *event)
 	perf_event_update_userpage(event);
 }
 
+static int pmc_overflow(int idx)
+{
+	u64 val;
+
+	rdmsrl(x86_pmu.eventsel + idx, val);
+	if ((val & (ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_INT)) ==
+		  (ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_INT)) {
+		rdmsrl(x86_pmu.perfctr + idx, val);
+		return !(val & (1ULL << (x86_pmu.cntval_bits - 1)));
+	}
+
+	return 0;
+}
+
 static int x86_pmu_handle_irq(struct pt_regs *regs)
 {
 	struct perf_sample_data data;
@@ -1141,6 +1155,8 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 	cpuc = &__get_cpu_var(cpu_hw_events);
 
 	for (idx = 0; idx < x86_pmu.num_counters; idx++) {
+		if (pmc_overflow(idx))
+			trace_printk("OVERFLOW: %d\n", idx);
 		if (!test_bit(idx, cpuc->active_mask))
 			continue;
 
@@ -1154,6 +1170,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
 		/*
 		 * event overflow
 		 */
+		trace_printk("HANDLED: %d\n", idx);
 		handled++;
 		data.period	= event->hw.last_period;
 
@@ -1215,6 +1232,11 @@ perf_event_nmi_handler(struct notifier_block *self,
 	unsigned int this_nmi;
 	int handled;
 
+	trace_printk("NMI: %d %d %d\n",
+			percpu_read(irq_stat.__nmi_count),
+			__get_cpu_var(pmu_nmi).marked,
+			__get_cpu_var(pmu_nmi).handled);
+
 	if (!atomic_read(&active_events))
 		return NOTIFY_DONE;
 
@@ -1224,9 +1246,12 @@ perf_event_nmi_handler(struct notifier_block *self,
 		break;
 	case DIE_NMIUNKNOWN:
 		this_nmi = percpu_read(irq_stat.__nmi_count);
-		if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+		if (this_nmi != __get_cpu_var(pmu_nmi).marked) {
+			trace_printk("NMI-fail\n");
 			/* let the kernel handle the unknown nmi */
 			return NOTIFY_DONE;
+		}
+		trace_printk("NMI-consume\n");
 		/*
 		 * This one is a PMU back-to-back nmi. Two events
 		 * trigger 'simultaneously' raising two back-to-back
@@ -1242,6 +1267,13 @@ perf_event_nmi_handler(struct notifier_block *self,
 	apic_write(APIC_LVTPC, APIC_DM_NMI);
 
 	handled = x86_pmu.handle_irq(args->regs);
+
+	trace_printk("NMI-handled(%d): %d %d %d\n",
+			handled,
+			percpu_read(irq_stat.__nmi_count),
+			__get_cpu_var(pmu_nmi).marked,
+			__get_cpu_var(pmu_nmi).handled);
+
 	if (!handled)
 		return NOTIFY_DONE;
 
@@ -1264,6 +1296,11 @@ perf_event_nmi_handler(struct notifier_block *self,
 		__get_cpu_var(pmu_nmi).handled	= handled;
 	}
 
+	trace_printk("NMI-stop: %d %d %d\n",
+			percpu_read(irq_stat.__nmi_count),
+			__get_cpu_var(pmu_nmi).marked,
+			__get_cpu_var(pmu_nmi).handled);
+
 	return NOTIFY_STOP;
 }
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/