lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 25 May 2022 15:09:28 +0530
From:   Ravi Bangoria <ravi.bangoria@....com>
To:     <peterz@...radead.org>, <acme@...nel.org>
CC:     <ravi.bangoria@....com>, <jolsa@...nel.org>, <namhyung@...nel.org>,
        <eranian@...gle.com>, <irogers@...gle.com>, <jmario@...hat.com>,
        <leo.yan@...aro.org>, <alisaidi@...zon.com>, <ak@...ux.intel.com>,
        <kan.liang@...ux.intel.com>, <dave.hansen@...ux.intel.com>,
        <hpa@...or.com>, <mingo@...hat.com>, <mark.rutland@....com>,
        <alexander.shishkin@...ux.intel.com>, <tglx@...utronix.de>,
        <bp@...en8.de>, <x86@...nel.org>,
        <linux-perf-users@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <sandipan.das@....com>, <ananth.narayan@....com>,
        <kim.phillips@....com>, <santosh.shukla@....com>
Subject: [PATCH 03/13] perf/x86/amd: Support PERF_SAMPLE_DATA_SRC based on IBS_OP_DATA*

struct perf_mem_data_src is used to pass arch specific memory access
details into generic form. These details gets consumed by tools like
perf mem and c2c. Each IBS tagged load/store sample provides most of
the information needed for these tools. Add a logic to convert IBS
specific raw data into perf_mem_data_src.

Signed-off-by: Ravi Bangoria <ravi.bangoria@....com>
---
 arch/x86/events/amd/ibs.c | 297 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 291 insertions(+), 6 deletions(-)

diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c
index c251bc44c088..6626caeed6a1 100644
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -688,6 +688,289 @@ static struct perf_ibs perf_ibs_op = {
 	.get_count		= get_ibs_op_count,
 };
 
+static void perf_ibs_get_mem_op(u64 op_data3, struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->mem_op = PERF_MEM_OP_NA;
+
+	if (op_data3 & IBS_LD_OP_MASK)
+		data_src->mem_op = PERF_MEM_OP_LOAD;
+	else if (op_data3 & IBS_ST_OP_MASK)
+		data_src->mem_op = PERF_MEM_OP_STORE;
+}
+
+/*
+ * Processors having CPUID_Fn8000001B_EAX[11] aka IBS_CAPS_ZEN4 has
+ * more fine granular DataSrc encodings. Others have coarse.
+ */
+static u8 perf_ibs_data_src(u64 op_data2)
+{
+	if (ibs_caps & IBS_CAPS_ZEN4) {
+		return ((op_data2 & IBS_DATA_SRC_HI_MASK) >> (IBS_DATA_SRC_HI_SHIFT - 3)) |
+		       ((op_data2 & IBS_DATA_SRC_LO_MASK) >> IBS_DATA_SRC_LO_SHIFT);
+	}
+
+	return (op_data2 & IBS_DATA_SRC_LO_MASK) >> IBS_DATA_SRC_LO_SHIFT;
+}
+
+static void perf_ibs_get_mem_lvl(struct perf_event *event, u64 op_data2,
+				 u64 op_data3, struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+	u8 ibs_data_src = perf_ibs_data_src(op_data2);
+
+	data_src->mem_lvl = 0;
+
+	/*
+	 * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached
+	 * memory accesses. So, check DcUcMemAcc bit early.
+	 */
+	if (op_data3 & IBS_DC_UC_MEM_ACC_MASK &&
+	    ibs_data_src != IBS_DATA_SRC_EXT_IO) {
+		data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* L1 Hit */
+	if ((op_data3 & IBS_DC_MISS_MASK) == 0) {
+		data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* L2 Hit */
+	if ((op_data3 & IBS_L2_MISS_MASK) == 0) {
+		/* Erratum #1293 */
+		if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF ||
+		    !(op_data3 & IBS_SW_PF_MASK || op_data3 & IBS_DC_MISS_NO_MAB_ALLOC_MASK)) {
+			data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	}
+
+	/* L3 Hit */
+	if (ibs_caps & IBS_CAPS_ZEN4) {
+		if (data_src->mem_op == PERF_MEM_OP_LOAD &&
+		    ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	} else {
+		if (data_src->mem_op == PERF_MEM_OP_LOAD &&
+		    ibs_data_src == IBS_DATA_SRC_LOC_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 |
+					    PERF_MEM_LVL_HIT;
+			return;
+		}
+	}
+
+	/* A peer cache in a near CCX. */
+	if (ibs_caps & IBS_CAPS_ZEN4 && data_src->mem_op == PERF_MEM_OP_LOAD &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) {
+		data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* A peer cache in a far CCX. */
+	if (ibs_caps & IBS_CAPS_ZEN4) {
+		if (data_src->mem_op == PERF_MEM_OP_LOAD &&
+		    ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	} else {
+		if (data_src->mem_op == PERF_MEM_OP_LOAD &&
+		    ibs_data_src == IBS_DATA_SRC_REM_CACHE) {
+			data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT;
+			return;
+		}
+	}
+
+	/* DRAM */
+	if (data_src->mem_op == PERF_MEM_OP_LOAD &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_DRAM) {
+		if ((op_data2 & IBS_RMT_NODE_MASK) == 0)
+			data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT;
+		else
+			data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	/* PMEM */
+	if (ibs_caps & IBS_CAPS_ZEN4 && data_src->mem_op == PERF_MEM_OP_LOAD &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_PMEM) {
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
+		if (op_data2 & IBS_RMT_NODE_MASK) {
+			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+			/* IBS doesn't provide Remote socket detail */
+			data_src->mem_hops = PERF_MEM_HOPS_1;
+		}
+		return;
+	}
+
+	/* Extension Memory */
+	if (ibs_caps & IBS_CAPS_ZEN4 && data_src->mem_op == PERF_MEM_OP_LOAD &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) {
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_EXTN_MEM;
+		if (op_data2 & IBS_RMT_NODE_MASK) {
+			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+			/* IBS doesn't provide Remote socket detail */
+			data_src->mem_hops = PERF_MEM_HOPS_1;
+		}
+		return;
+	}
+
+	/* IO */
+	if (data_src->mem_op == PERF_MEM_OP_LOAD &&
+	    ibs_data_src == IBS_DATA_SRC_EXT_IO) {
+		data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO;
+		if (op_data2 & IBS_RMT_NODE_MASK) {
+			data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+			/* IBS doesn't provide Remote socket detail */
+			data_src->mem_hops = PERF_MEM_HOPS_1;
+		}
+		return;
+	}
+
+	/*
+	 * MAB (Miss Address Buffer) Hit. MAB keeps track of outstanding
+	 * DC misses. However such data may come from any level in mem
+	 * hierarchy. IBS provides detail about both MAB as well as actual
+	 * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set
+	 * MAB only when IBS fails to provide DataSrc.
+	 */
+	if (op_data3 & IBS_DC_MISS_NO_MAB_ALLOC_MASK) {
+		data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
+		return;
+	}
+
+	data_src->mem_lvl = PERF_MEM_LVL_NA;
+}
+
+static bool perf_ibs_cache_hit_st_valid(void)
+{
+	/* 0: Uninitialized, 1: Valid, -1: Invalid */
+	static int cache_hist_st_valid;
+
+	if (unlikely(!cache_hist_st_valid)) {
+		if (boot_cpu_data.x86 == 0x19 &&
+		    (boot_cpu_data.x86_model <= 0xF ||
+		    (boot_cpu_data.x86_model >= 0x20 &&
+		     boot_cpu_data.x86_model <= 0x5F))) {
+			cache_hist_st_valid = -1;
+		} else {
+			cache_hist_st_valid = 1;
+		}
+	}
+
+	return cache_hist_st_valid == 1;
+}
+
+static void perf_ibs_get_mem_snoop(u64 op_data2, struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+	u8 ibs_data_src;
+
+	data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+
+	if (!perf_ibs_cache_hit_st_valid() ||
+	    data_src->mem_op != PERF_MEM_OP_LOAD ||
+	    data_src->mem_lvl & PERF_MEM_LVL_L1 ||
+	    data_src->mem_lvl & PERF_MEM_LVL_L2 ||
+	    op_data2 & IBS_CACHE_HIT_ST_MASK)
+		return;
+
+	ibs_data_src = perf_ibs_data_src(op_data2);
+
+	if ((ibs_data_src == IBS_DATA_SRC_LOC_CACHE) ||
+	    (ibs_caps & IBS_CAPS_ZEN4 && (
+	     ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE ||
+	     ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE ||
+	     ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE))) {
+		data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+	}
+}
+
+static void perf_ibs_get_tlb_lvl(u64 op_data3, struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+	u64 l1_tlb_miss = op_data3 & IBS_DC_L1_TLB_MISS_MASK;
+	u64 lin_addr_valid = op_data3 & IBS_DC_LIN_ADDR_VALID_MASK;
+	u64 l2_tlb_miss = op_data3 & IBS_DC_L2_TLB_MISS_MASK;
+
+	data_src->mem_dtlb = PERF_MEM_TLB_NA;
+
+	if (!lin_addr_valid)
+		return;
+
+	if (!l1_tlb_miss) {
+		data_src->mem_dtlb = PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT;
+		return;
+	}
+
+	if (!l2_tlb_miss) {
+		data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT;
+		return;
+	}
+
+	data_src->mem_dtlb = PERF_MEM_TLB_L2 | PERF_MEM_TLB_MISS;
+}
+
+static void perf_ibs_get_mem_lock(u64 op_data3, struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+
+	data_src->mem_lock = PERF_MEM_LOCK_NA;
+
+	if (op_data3 & IBS_DC_LOCKED_OP_MASK)
+		data_src->mem_lock = PERF_MEM_LOCK_LOCKED;
+}
+
+#define ibs_op_msr_idx(msr)	(msr - MSR_AMD64_IBSOPCTL)
+
+static void perf_ibs_get_data_src(struct perf_event *event,
+				  struct perf_ibs_data *ibs_data,
+				  struct perf_sample_data *data)
+{
+	union perf_mem_data_src *data_src = &data->data_src;
+	u64 op_data2 = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA2)];
+	u64 op_data3 = ibs_data->regs[ibs_op_msr_idx(MSR_AMD64_IBSOPDATA3)];
+
+	perf_ibs_get_mem_op(op_data3, data);
+	if (data_src->mem_op != PERF_MEM_OP_LOAD &&
+	    data_src->mem_op != PERF_MEM_OP_STORE)
+		return;
+
+	/* Erratum #1293 */
+	if (boot_cpu_data.x86 == 0x19 && boot_cpu_data.x86_model <= 0xF &&
+	    (op_data3 & IBS_SW_PF_MASK ||
+	     op_data3 & IBS_DC_MISS_NO_MAB_ALLOC_MASK)) {
+		/*
+		 * OP_DATA2 has only two fields on Zen3: DataSrc and RmtNode.
+		 * DataSrc=0 is No valid status and RmtNode is invalid when
+		 * DataSrc=0.
+		 */
+		op_data2 = 0;
+	}
+
+	perf_ibs_get_mem_lvl(event, op_data2, op_data3, data);
+	perf_ibs_get_mem_snoop(op_data2, data);
+	perf_ibs_get_tlb_lvl(op_data3, data);
+	perf_ibs_get_mem_lock(op_data3, data);
+}
+
+static int perf_ibs_get_offset_max(struct perf_ibs *perf_ibs, u64 sample_type,
+				   int check_rip)
+{
+	if (sample_type & PERF_SAMPLE_RAW ||
+	    (perf_ibs == &perf_ibs_op &&
+	     sample_type & PERF_SAMPLE_DATA_SRC))
+		return perf_ibs->offset_max;
+	else if (check_rip)
+		return 3;
+	return 1;
+}
+
 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 {
 	struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
@@ -735,12 +1018,9 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 	size = 1;
 	offset = 1;
 	check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
-	if (event->attr.sample_type & PERF_SAMPLE_RAW)
-		offset_max = perf_ibs->offset_max;
-	else if (check_rip)
-		offset_max = 3;
-	else
-		offset_max = 1;
+
+	offset_max = perf_ibs_get_offset_max(perf_ibs, event->attr.sample_type, check_rip);
+
 	do {
 		rdmsrl(msr + offset, *buf++);
 		size++;
@@ -793,6 +1073,11 @@ static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
 		data.raw = &raw;
 	}
 
+	if (perf_ibs == &perf_ibs_op) {
+		if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC)
+			perf_ibs_get_data_src(event, &ibs_data, &data);
+	}
+
 	/*
 	 * rip recorded by IbsOpRip will not be consistent with rsp and rbp
 	 * recorded as part of interrupt regs. Thus we need to use rip from
-- 
2.31.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ