lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Wed, 13 Nov 2019 10:56:43 -0000
From:   "tip-bot2 for Alexander Shishkin" <tip-bot2@...utronix.de>
To:     linux-tip-commits@...r.kernel.org
Cc:     Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        "Peter Zijlstra (Intel)" <peterz@...radead.org>,
        Arnaldo Carvalho de Melo <acme@...hat.com>,
        David Ahern <dsahern@...il.com>, Jiri Olsa <jolsa@...nel.org>,
        Jiri Olsa <jolsa@...hat.com>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Mark Rutland <mark.rutland@....com>,
        Namhyung Kim <namhyung@...nel.org>,
        Stephane Eranian <eranian@...gle.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Vince Weaver <vincent.weaver@...ne.edu>,
        Ingo Molnar <mingo@...nel.org>, Borislav Petkov <bp@...en8.de>,
        linux-kernel@...r.kernel.org
Subject: [tip: perf/core] perf/x86/intel/pt: Prevent redundant WRMSRs

The following commit has been merged into the perf/core branch of tip:

Commit-ID:     295c52ee1485e4dee660fc1a0e6ceed6c803c9d3
Gitweb:        https://git.kernel.org/tip/295c52ee1485e4dee660fc1a0e6ceed6c803c9d3
Author:        Alexander Shishkin <alexander.shishkin@...ux.intel.com>
AuthorDate:    Tue, 05 Nov 2019 10:27:01 +02:00
Committer:     Ingo Molnar <mingo@...nel.org>
CommitterDate: Wed, 13 Nov 2019 11:06:18 +01:00

perf/x86/intel/pt: Prevent redundant WRMSRs

With recent optimizations to AUX and PT buffer management code (high order
AUX allocations, opportunistic Single Range Output), it is far more likely
now that the output MSRs won't need reprogramming on every sched-in.

To avoid needless WRMSRs of those registers, cache their values and only
write them when needed.

Signed-off-by: Alexander Shishkin <alexander.shishkin@...ux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Cc: Arnaldo Carvalho de Melo <acme@...hat.com>
Cc: David Ahern <dsahern@...il.com>
Cc: Jiri Olsa <jolsa@...nel.org>
Cc: Jiri Olsa <jolsa@...hat.com>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Mark Rutland <mark.rutland@....com>
Cc: Namhyung Kim <namhyung@...nel.org>
Cc: Stephane Eranian <eranian@...gle.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Vince Weaver <vincent.weaver@...ne.edu>
Link: https://lkml.kernel.org/r/20191105082701.78442-3-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@...nel.org>
---
 arch/x86/events/intel/pt.c | 25 ++++++++++++++++---------
 arch/x86/events/intel/pt.h | 10 +++++++---
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index c87d163..1db7a51 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -606,6 +606,7 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
 
 static void pt_config_buffer(struct pt_buffer *buf)
 {
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	u64 reg, mask;
 	void *base;
 
@@ -617,11 +618,17 @@ static void pt_config_buffer(struct pt_buffer *buf)
 		mask = (u64)buf->cur_idx;
 	}
 
-	wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(base));
+	reg = virt_to_phys(base);
+	if (pt->output_base != reg) {
+		pt->output_base = reg;
+		wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
+	}
 
 	reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
-
-	wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+	if (pt->output_mask != reg) {
+		pt->output_mask = reg;
+		wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+	}
 }
 
 /**
@@ -930,21 +937,21 @@ static void pt_handle_status(struct pt *pt)
  */
 static void pt_read_offset(struct pt_buffer *buf)
 {
-	u64 offset, base;
+	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct topa_page *tp;
 
 	if (!buf->single) {
-		rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base);
-		tp = phys_to_virt(base);
+		rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
+		tp = phys_to_virt(pt->output_base);
 		buf->cur = &tp->topa;
 	}
 
-	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+	rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
 	/* offset within current output region */
-	buf->output_off = offset >> 32;
+	buf->output_off = pt->output_mask >> 32;
 	/* index of current output region within this table */
 	if (!buf->single)
-		buf->cur_idx = (offset & 0xffffff80) >> 7;
+		buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
 }
 
 static struct topa_entry *
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 3f78182..96906a6 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -113,16 +113,20 @@ struct pt_filters {
 
 /**
  * struct pt - per-cpu pt context
- * @handle:	perf output handle
+ * @handle:		perf output handle
  * @filters:		last configured filters
- * @handle_nmi:	do handle PT PMI on this cpu, there's an active event
- * @vmx_on:	1 if VMX is ON on this cpu
+ * @handle_nmi:		do handle PT PMI on this cpu, there's an active event
+ * @vmx_on:		1 if VMX is ON on this cpu
+ * @output_base:	cached RTIT_OUTPUT_BASE MSR value
+ * @output_mask:	cached RTIT_OUTPUT_MASK MSR value
  */
 struct pt {
 	struct perf_output_handle handle;
 	struct pt_filters	filters;
 	int			handle_nmi;
 	int			vmx_on;
+	u64			output_base;
+	u64			output_mask;
 };
 
 #endif /* __INTEL_PT_H__ */

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ