lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1491562066-7472-3-git-send-email-yao.jin@linux.intel.com>
Date:   Fri,  7 Apr 2017 18:47:43 +0800
From:   Jin Yao <yao.jin@...ux.intel.com>
To:     acme@...nel.org, jolsa@...nel.org, peterz@...radead.org,
        mingo@...hat.com, alexander.shishkin@...ux.intel.com
Cc:     Linux-kernel@...r.kernel.org, ak@...ux.intel.com,
        kan.liang@...el.com, yao.jin@...el.com,
        linuxppc-dev@...ts.ozlabs.org, Jin Yao <yao.jin@...ux.intel.com>
Subject: [PATCH v2 2/5] perf/x86/intel: Record branch type

Perf already has support for disassembling the branch instruction
and using the branch type for filtering. The patch just records
the branch type in perf_branch_entry.

Before recording, the patch converts the x86 branch classification
to common branch classification and compute for checking if the
branches cross 4K or 2MB areas. It's an approximate computing for
crossing 4K page or 2MB page.

Signed-off-by: Jin Yao <yao.jin@...ux.intel.com>
---
 arch/x86/events/intel/lbr.c | 106 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 1 deletion(-)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 81b321a..635a0fb 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -109,6 +109,9 @@ enum {
 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
 	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
+
+	X86_BR_TYPE_SAVE	= 1 << 18,/* indicate to save branch type */
+
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -139,6 +142,9 @@ enum {
 	 X86_BR_IRQ		|\
 	 X86_BR_INT)
 
+#define AREA_4K		4096
+#define AREA_2M		(2 * 1024 * 1024)
+
 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
 
 /*
@@ -670,6 +676,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 
 	if (br_type & PERF_SAMPLE_BRANCH_CALL)
 		mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
+
+	if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
+		mask |= X86_BR_TYPE_SAVE;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -923,6 +933,84 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 	return ret;
 }
 
+static int
+common_branch_type(int type, u64 from, u64 to)
+{
+	int ret;
+
+	type = type & (~(X86_BR_KERNEL | X86_BR_USER));
+
+	switch (type) {
+	case X86_BR_CALL:
+	case X86_BR_ZERO_CALL:
+		ret = PERF_BR_CALL;
+		break;
+
+	case X86_BR_RET:
+		ret = PERF_BR_RET;
+		break;
+
+	case X86_BR_SYSCALL:
+		ret = PERF_BR_SYSCALL;
+		break;
+
+	case X86_BR_SYSRET:
+		ret = PERF_BR_SYSRET;
+		break;
+
+	case X86_BR_INT:
+		ret = PERF_BR_INT;
+		break;
+
+	case X86_BR_IRET:
+		ret = PERF_BR_IRET;
+		break;
+
+	case X86_BR_IRQ:
+		ret = PERF_BR_IRQ;
+		break;
+
+	case X86_BR_ABORT:
+		ret = PERF_BR_FAR_BRANCH;
+		break;
+
+	case X86_BR_JCC:
+		if (to > from)
+			ret = PERF_BR_JCC_FWD;
+		else
+			ret = PERF_BR_JCC_BWD;
+		break;
+
+	case X86_BR_JMP:
+		ret = PERF_BR_JMP;
+		break;
+
+	case X86_BR_IND_CALL:
+		ret = PERF_BR_IND_CALL;
+		break;
+
+	case X86_BR_IND_JMP:
+		ret = PERF_BR_IND_JMP;
+		break;
+
+	default:
+		ret = PERF_BR_NONE;
+	}
+
+	return ret;
+}
+
+static bool
+cross_area(u64 addr1, u64 addr2, int size)
+{
+	u64 align1, align2;
+
+	align1 = addr1 & ~(size - 1);
+	align2 = addr2 & ~(size - 1);
+
+	return (align1 != align2) ? true : false;
+}
+
 /*
  * implement actual branch filter based on user demand.
  * Hardware may not exactly satisfy that request, thus
@@ -939,7 +1027,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 	bool compress = false;
 
 	/* if sampling all branches, then nothing to filter */
-	if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
+	if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
+	    ((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
 		return;
 
 	for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -960,6 +1049,21 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 			cpuc->lbr_entries[i].from = 0;
 			compress = true;
 		}
+
+		if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE) {
+			cpuc->lbr_entries[i].type = common_branch_type(type,
+								       from,
+								       to);
+			if (cross_area(from, to, AREA_2M))
+				cpuc->lbr_entries[i].cross = PERF_BR_CROSS_2M;
+			else if (cross_area(from, to, AREA_4K))
+				cpuc->lbr_entries[i].cross = PERF_BR_CROSS_4K;
+			else
+				cpuc->lbr_entries[i].cross = PERF_BR_CROSS_NONE;
+		} else {
+			cpuc->lbr_entries[i].type = PERF_BR_NONE;
+			cpuc->lbr_entries[i].cross = PERF_BR_CROSS_NONE;
+		}
 	}
 
 	if (!compress)
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ