[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <155232292270.21417.18139649076000959940.stgit@tlendack-t1.amdoffice.net>
Date: Mon, 11 Mar 2019 16:48:44 +0000
From: "Lendacky, Thomas" <Thomas.Lendacky@....com>
To: "x86@...nel.org" <x86@...nel.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
CC: Peter Zijlstra <peterz@...radead.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Namhyung Kim <namhyung@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Jiri Olsa <jolsa@...hat.com>
Subject: [RFC PATCH 1/2] x86/perf/amd: Resolve race condition when disabling
PMC
On AMD processors, the detection of an overflowed counter in the NMI
handler relies on the current value of the counter. So, for example, to
check for overflow on a 48 bit counter, bit 47 is checked to see if it
is 1 (not overflowed) or 0 (overflowed).
There is currently a race condition present when disabling and then
updating the PMC. Increased NMI latency in newer AMD processors makes this
race condition more pronounced. If the counter value has overflowed, it is
possible to update the PMC value before the NMI handler can run. The
updated PMC value is not an overflowed value, so when the perf NMI handler
does run, it will not find an overflowed counter. This may appear as an
unknown NMI resulting in either a panic or a series of messages, depending
on how the kernel is configured.
To eliminate this race condition, the PMC value must be checked after
disabling the counter in x86_pmu_disable_all(), and, if overflowed, must
wait for the NMI handler to reset the value before continuing. Add a new,
optional, callable function that can be used to test for and resolve this
condition.
Cc: <stable@...r.kernel.org> # 4.14.x-
Signed-off-by: Tom Lendacky <thomas.lendacky@....com>
---
arch/x86/events/amd/core.c | 39 +++++++++++++++++++++++++++++++++++++++
arch/x86/events/core.c | 3 +++
arch/x86/events/perf_event.h | 1 +
3 files changed, 43 insertions(+)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 7d2d7c801dba..d989640fa87d 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -3,6 +3,7 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/delay.h>
#include <asm/apicdef.h>
#include "../perf_event.h"
@@ -429,6 +430,43 @@ static void amd_pmu_cpu_dead(int cpu)
}
}
+#define OVERFLOW_WAIT_COUNT 50
+
+static void amd_pmu_wait_on_overflow(int idx, u64 config)
+{
+ unsigned int i;
+ u64 counter;
+
+ /*
+ * We shouldn't be calling this from NMI context, but add a safeguard
+ * here to return, since if we're in NMI context we can't wait for an
+ * NMI to reset an overflowed counter value.
+ */
+ if (in_nmi())
+ return;
+
+ /*
+ * If the interrupt isn't enabled then we won't get the NMI that will
+ * reset the overflow condition, so return.
+ */
+ if (!(config & ARCH_PERFMON_EVENTSEL_INT))
+ return;
+
+ /*
+ * Wait for the counter to be reset if it has overflowed. This loop
+ * should exit very, very quickly, but just in case, don't wait
+ * forever...
+ */
+ for (i = 0; i < OVERFLOW_WAIT_COUNT; i++) {
+ rdmsrl(x86_pmu_event_addr(idx), counter);
+ if (counter & (1ULL << (x86_pmu.cntval_bits - 1)))
+ break;
+
+ /* Might be in IRQ context, so can't sleep */
+ udelay(1);
+ }
+}
+
static struct event_constraint *
amd_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -650,6 +688,7 @@ static __initconst const struct x86_pmu amd_pmu = {
.cpu_dead = amd_pmu_cpu_dead,
.amd_nb_constraints = 1,
+ .wait_on_overflow = amd_pmu_wait_on_overflow,
};
static int __init amd_core_pmu_init(void)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index b684f0294f35..f1d2f70000cd 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -606,6 +606,9 @@ void x86_pmu_disable_all(void)
continue;
val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
wrmsrl(x86_pmu_config_addr(idx), val);
+
+ if (x86_pmu.wait_on_overflow)
+ x86_pmu.wait_on_overflow(idx, val);
}
}
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 7e75f474b076..a37490a26a09 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -636,6 +636,7 @@ struct x86_pmu {
* AMD bits
*/
unsigned int amd_nb_constraints : 1;
+ void (*wait_on_overflow)(int idx, u64 config);
/*
* Extra registers for events
Powered by blists - more mailing lists