lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1445366797-30894-2-git-send-email-andi@firstfloor.org>
Date:	Tue, 20 Oct 2015 11:46:34 -0700
From:	Andi Kleen <andi@...stfloor.org>
To:	peterz@...radead.org
Cc:	acme@...nel.org, jolsa@...nel.org, linux-kernel@...r.kernel.org,
	Andi Kleen <ak@...ux.intel.com>
Subject: [PATCH 2/5] x86, perf: Add option to disable reading branch flags/cycles

From: Andi Kleen <ak@...ux.intel.com>

With LBRv5 reading the extra LBR flags like mispredict, TSX, cycles
is not free anymore, as it has moved to a separate MSR.

For callstack mode we don't need any of this information; so we can
avoid the unnecessary MSR read. Add flags to the perf interface
where perf record can request not collecting this information.

I added branch_sample_type flags for CYCLES and FLAGS. It's a bit unusual for
branch_sample_types to be negative (disable), not positive (enable), but
since the legacy ABI reported the flags we need some form of explicit
disabling to avoid breaking the ABI.

After we have the flags the x86 perf code can keep track if any
users need the flags. If noone needs it the information is not
collected.

This cuts down the cost of LBR callstack on Skylake significantly.
Profiling a kernel build with LBR call stack the average run time of
the PMI handler drops by 43%

v2: Rework based on Peter's feedback.
Moved the NO_FLAGS/NO_CYCLES bits to branch_sample_type.
Remove extra checks for no info, so that the flags are filled in if
there is no lbr_info or if only one is set to reduce number of branches.
Instead of counters the information about skipping LBR_INFO is now
kept as a extra register bit that is filtered out before writing
the register to hardware.

Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 19 +++++++++++++++++--
 include/uapi/linux/perf_event.h            |  6 ++++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 0e4ea00..60e71b7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -42,6 +42,13 @@ static enum {
 #define LBR_FAR_BIT		8 /* do not capture far branches */
 #define LBR_CALL_STACK_BIT	9 /* enable call stack */
 
+/*
+ * Following bit only exists in Linux; we mask it out before writing it to
+ * the actual MSR. But it helps the constraint perf code to understand
+ * that this is a separate configuration.
+ */
+#define LBR_NO_INFO_BIT	       63 /* don't read LBR_INFO. */
+
 #define LBR_KERNEL	(1 << LBR_KERNEL_BIT)
 #define LBR_USER	(1 << LBR_USER_BIT)
 #define LBR_JCC		(1 << LBR_JCC_BIT)
@@ -52,6 +59,7 @@ static enum {
 #define LBR_IND_JMP	(1 << LBR_IND_JMP_BIT)
 #define LBR_FAR		(1 << LBR_FAR_BIT)
 #define LBR_CALL_STACK	(1 << LBR_CALL_STACK_BIT)
+#define LBR_NO_INFO	(1ULL << LBR_NO_INFO_BIT)
 
 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 
@@ -152,7 +160,7 @@ static void __intel_pmu_lbr_enable(bool pmi)
 	 * did not change.
 	 */
 	if (cpuc->lbr_sel)
-		lbr_select = cpuc->lbr_sel->config;
+		lbr_select = cpuc->lbr_sel->config & x86_pmu.lbr_sel_mask;
 	if (!pmi)
 		wrmsrl(MSR_LBR_SELECT, lbr_select);
 
@@ -422,6 +430,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
  */
 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 {
+	bool need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO);
 	unsigned long mask = x86_pmu.lbr_nr - 1;
 	int lbr_format = x86_pmu.intel_cap.lbr_format;
 	u64 tos = intel_pmu_lbr_tos();
@@ -442,7 +451,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
 		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
 		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
 
-		if (lbr_format == LBR_FORMAT_INFO) {
+		if (lbr_format == LBR_FORMAT_INFO && need_info) {
 			u64 info;
 
 			rdmsrl(MSR_LBR_INFO_0 + lbr_idx, info);
@@ -588,6 +597,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 		if (v != LBR_IGN)
 			mask |= v;
 	}
+
 	reg = &event->hw.branch_reg;
 	reg->idx = EXTRA_REG_LBR;
 
@@ -598,6 +608,11 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 	 */
 	reg->config = mask ^ x86_pmu.lbr_sel_mask;
 
+	if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
+	    (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
+	    (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
+		reg->config |= LBR_NO_INFO;
+
 	return 0;
 }
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 2881145..a8ffc76 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -169,6 +169,9 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
 	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */
 
+	PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT	= 13, /* no flags */
+	PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT	= 14, /* no cycles */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -189,6 +192,9 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
 	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 
+	PERF_SAMPLE_BRANCH_NO_FLAGS	= 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+	PERF_SAMPLE_BRANCH_NO_CYCLES	= 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ