lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 21 Jun 2016 11:31:11 -0700
From:	David Carrillo-Cisneros <davidcc@...gle.com>
To:	linux-kernel@...r.kernel.org
Cc:	"x86@...nel.org" <x86@...nel.org>, Ingo Molnar <mingo@...hat.com>,
	Andi Kleen <ak@...ux.intel.com>,
	Kan Liang <kan.liang@...el.com>,
	Peter Zijlstra <peterz@...radead.org>,
	David Carrillo-Cisneros <davidcc@...gle.com>
Subject: [PATCH v02 2/5] perf/x86/intel: fix for MSR_LAST_BRANCH_FROM_x bug when no TSX

Intel's SDM states that bits 61:62 in MSR_LAST_BRANCH_FROM_x are the
TSX flags for formats with LBR_TSX flags (i.e. LBR_FORMAT_EIP_EFLAGS2).

However, when the CPU has TSX support deactivated, bits 61:62 actually
behave as follows:
  - For wrmsr, bits 61:62 are considered part of the sign extension.
  - When capturing branches, the LBR hw will always clear bits 61:62.
  regardless of the sign extension.

Therefore, if:
  1) LBR has TSX format.
  2) CPU has no TSX support enabled.
then any value passed to wrmsr must be sign extended to 63 bits and any
value from rdmsr must be converted to have a sign extension of 61 bits,
ignoring the values at TSX flags.

This bug was masked by the work-around to the Intel's CPU bug:
BJ94. "LBR May Contain Incorrect Information When Using FREEZE_LBRS_ON_PMI"
in Document Number: 324643-037US.

The aforementioned work-around uses hw flags to filter out all kernel
branches, limiting LBR callstack to user level execution only.
Since user addresses are not sign extended, they do not trigger the wrmsr
bug in MSR_LAST_BRANCH_FROM_x when saved/restored at context switch.

To verify the hw bug:

  $ perf record -b -e cycles sleep 1
  $ rdmsr -p 0 0x680
  0x1fffffffb0b9b0cc
  $ wrmsr -p 0 0x680 0x1fffffffb0b9b0cc
  write(): Input/output error

The quirk for LBR_FROM_ msrs is required before calls to wrmsrl and
after rdmsrl.

This patch introduces it for wrmsrl's done for testing LBR support.
Future patch in series adds the quirk for context switch, that would
be required if LBR callstack is to be enabled for ring 0.

Signed-off-by: David Carrillo-Cisneros <davidcc@...gle.com>
Reviewed-by: Stephane Eranian <eranian@...gle.com>
---
 arch/x86/events/intel/core.c | 18 ++++++++++++++++
 arch/x86/events/intel/lbr.c  | 49 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/perf_event.h |  2 ++
 3 files changed, 69 insertions(+)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a5e52ad4..eb3a26a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3302,6 +3302,13 @@ static void intel_snb_check_microcode(void)
 	}
 }
 
+static bool is_lbr_from(unsigned long msr)
+{
+	unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
+
+	return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
+}
+
 /*
  * Under certain circumstances, access certain MSR may cause #GP.
  * The function tests if the input MSR can be safely accessed.
@@ -3322,13 +3329,24 @@ static bool check_msr(unsigned long msr, u64 mask)
 	 * Only change the bits which can be updated by wrmsrl.
 	 */
 	val_tmp = val_old ^ mask;
+
+	if (is_lbr_from(msr))
+		val_tmp = lbr_from_signext_quirk_wr(val_tmp);
+
 	if (wrmsrl_safe(msr, val_tmp) ||
 	    rdmsrl_safe(msr, &val_new))
 		return false;
 
+	/*
+	 * quirk only affects validation in wrmsr, so wrmsrl's value
+	 * should equal rdmsrl's even with the quirk.
+	 */
 	if (val_new != val_tmp)
 		return false;
 
+	if (is_lbr_from(msr))
+		val_old = lbr_from_signext_quirk_wr(val_old);
+
 	/* Here it's sure that the MSR can be safely accessed.
 	 * Restore the old value and return.
 	 */
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 2dca66c..3a2c67d 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -81,6 +81,8 @@ static enum {
 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
+#define LBR_FROM_SIGNEXT_2MSB	(BIT_ULL(60) | BIT_ULL(59))
+
 /*
  * x86control flow change classification
  * x86control flow changes include branches, interrupts, traps, faults
@@ -235,6 +237,50 @@ enum {
 	LBR_VALID,
 };
 
+/*
+ * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
+ * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
+ * TSX is not supported they have no consistent behavior:
+ *   - For wrmsr, bits 61:62 are considered part of the sign extension.
+ *   - For HW updates (branch captures) bits 61:62 are always OFF and are not
+ *  part of the sign extension.
+ *
+ * Therefore, if:
+ *   1) LBR has TSX format
+ *   2) CPU has no TSX support enabled
+ * then any value passed to wrmsr must be sign extended to 63 bits and any
+ * value from rdmsr must be converted to have a 61 bits sign extension,
+ * ignoring the TSX flags.
+ */
+
+static inline bool lbr_from_signext_quirk_needed(void)
+{
+	int lbr_format = x86_pmu.intel_cap.lbr_format;
+	bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
+			   boot_cpu_has(X86_FEATURE_RTM);
+
+	return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
+}
+
+DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
+
+/*If quirk is enabled, ensure sign extension is 63 bits. */
+inline u64 lbr_from_signext_quirk_wr(u64 val)
+{
+	if (static_branch_unlikely(&lbr_from_quirk_key))
+		/*
+		 * Sign extend into bits 61:62 while preserving bit 63.
+		 *
+		 * Quirk is enabled when TSX is disabled. Therefore TSX bits
+		 * in val are always OFF and must be changed to be sign
+		 * extension bits. Since bits 59:60 are guaranteed to be
+		 * part of the sign extension bits, we can just copy them
+		 * to 61:62.
+		 */
+		val |= (LBR_FROM_SIGNEXT_2MSB & val) << 2;
+	return val;
+}
+
 static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
 {
 	int i;
@@ -1007,6 +1053,9 @@ void intel_pmu_lbr_init_hsw(void)
 
 	x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
 	x86_pmu.lbr_sel_map  = hsw_lbr_sel_map;
+
+	if (lbr_from_signext_quirk_needed())
+		static_branch_enable(&lbr_from_quirk_key);
 }
 
 /* skylake */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 8bd764d..c934931 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -892,6 +892,8 @@ void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
 
+u64 lbr_from_signext_quirk_wr(u64 val);
+
 void intel_pmu_lbr_reset(void);
 
 void intel_pmu_lbr_enable(struct perf_event *event);
-- 
2.8.0.rc3.226.g39d4020

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ