lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  7 Jun 2017 16:22:24 -0700
From:   Andi Kleen <andi@...stfloor.org>
To:     peterz@...radead.org, acme@...nel.org
Cc:     linux-kernel@...r.kernel.org, jolsa@...nel.org, eranian@...gle.com,
        Andi Kleen <ak@...ux.intel.com>
Subject: [PATCH v2 2/4] perf/x86: Fix data source decoding for Skylake

From: Andi Kleen <ak@...ux.intel.com>

Skylake changed the encoding of the PEBS data source field.
Some combinations are not available anymore, but some new cases
e.g. for L4 cache hit are added.

Fix up the conversion table for Skylake, similar as had been done
for Nehalem.

On Skylake server the encoding for L4 actually means persistent
memory. Handle this case too.

To properly describe it in the abstracted perf format I had to add
some new bits. Unfortunately the existing fields were full, so
this required adding eXtension fields for mem_lvl and snoop
into existing reserved space.

v2: Merge with persistent memory patch.
Add explicit bit for each case instead of using generic modifier.
Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 arch/x86/events/intel/core.c    |  2 ++
 arch/x86/events/intel/ds.c      | 13 +++++++++++++
 arch/x86/events/perf_event.h    |  2 ++
 include/uapi/linux/perf_event.h | 22 ++++++++++++++++++++--
 4 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index dec9b4bf0752..08e53f36d697 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4052,6 +4052,8 @@ __init int intel_pmu_init(void)
 						  skl_format_attr);
 		WARN_ON(!x86_pmu.format_attrs);
 		x86_pmu.cpu_events = hsw_events_attrs;
+		intel_pmu_pebs_data_source_skl(
+			boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X);
 		pr_cont("Skylake events, ");
 		break;
 
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 7732999f5e2a..cd28c4babd36 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -79,6 +79,19 @@ void __init intel_pmu_pebs_data_source_nhm(void)
 	pebs_data_source[0x07] = OP_LH | P(LVL, L3)  | P(SNOOP, HITM);
 }
 
+void __init intel_pmu_pebs_data_source_skl(bool pmem)
+{
+	u64 pmem_or_l4;
+
+	pmem_or_l4 = pmem ? P(LVLX, PMEM) : P(LVLX, L4);
+	pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+	pmem_or_l4 = pmem ? P(LVLX, REM_PMEM) : P(LVLX, REM_L4);
+	pebs_data_source[0x09] = OP_LH | pmem_or_l4 | P(SNOOP, HIT);
+	pebs_data_source[0x0b] = OP_LH | P(LVLX, REM_RAM) | P(SNOOP, NONE);
+	pebs_data_source[0x0c] = OP_LH | P(LVLX, REM_NA) | P(SNOOPX, FWD);
+	pebs_data_source[0x0d] = OP_LH | P(LVLX, REM_NA) | P(SNOOP, HITM);
+}
+
 static u64 precise_store_data(u64 status)
 {
 	union intel_x86_pebs_dse dse;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index a6d9d6570957..d7571f248652 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -946,6 +946,8 @@ void intel_pmu_lbr_init_knl(void);
 
 void intel_pmu_pebs_data_source_nhm(void);
 
+void intel_pmu_pebs_data_source_skl(bool pmem);
+
 int intel_pmu_setup_lbr_filter(struct perf_event *event);
 
 void intel_pt_interrupt(void);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index b1c0b187acfe..95daade294d7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -931,14 +931,18 @@ union perf_mem_data_src {
 			mem_snoop:5,	/* snoop mode */
 			mem_lock:2,	/* lock instr */
 			mem_dtlb:7,	/* tlb access */
-			mem_rsvd:31;
+			mem_lvlx:8,	/* memory hierarchy level, ext */
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_rsvd:21;
 	};
 };
 #elif defined(__BIG_ENDIAN_BITFIELD)
 union perf_mem_data_src {
 	__u64 val;
 	struct {
-		__u64	mem_rsvd:31,
+		__u64	mem_rsvd:21,
+			mem_snoopx:2,	/* snoop mode, ext */
+			mem_lvlx:8,	/* memory hierarchy level, ext */
 			mem_dtlb:7,	/* tlb access */
 			mem_lock:2,	/* lock instr */
 			mem_snoop:5,	/* snoop mode */
@@ -975,6 +979,16 @@ union perf_mem_data_src {
 #define PERF_MEM_LVL_UNC	0x2000 /* Uncached memory */
 #define PERF_MEM_LVL_SHIFT	5
 
+#define PERF_MEM_LVLX_L4	0x01 /* L4 */
+#define PERF_MEM_LVLX_REM_L4    0x02 /* Remote L4 */
+#define PERF_MEM_LVLX_REM_RAM	0x04 /* Remote Ram, unknown hops */
+#define PERF_MEM_LVLX_PMEM	0x08 /* Persistent Memory */
+#define PERF_MEM_LVLX_REM_PMEM	0x10 /* Remote Persistent Memory */
+#define PERF_MEM_LVLX_REM_NA	0x20 /* Remote N/A level */
+/* 2 free */
+
+#define PERF_MEM_LVLX_SHIFT	33
+
 /* snoop mode */
 #define PERF_MEM_SNOOP_NA	0x01 /* not available */
 #define PERF_MEM_SNOOP_NONE	0x02 /* no snoop */
@@ -983,6 +997,10 @@ union perf_mem_data_src {
 #define PERF_MEM_SNOOP_HITM	0x10 /* snoop hit modified */
 #define PERF_MEM_SNOOP_SHIFT	19
 
+#define PERF_MEM_SNOOPX_FWD	0x01 /* forward */
+/* 1 free */
+#define PERF_MEM_SNOOPX_SHIFT	41
+
 /* locked instruction */
 #define PERF_MEM_LOCK_NA	0x01 /* not available */
 #define PERF_MEM_LOCK_LOCKED	0x02 /* locked transaction */
-- 
2.9.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ