lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Tue, 7 Jul 2020 11:53:37 -0700 From: kan.liang@...ux.intel.com To: peterz@...radead.org, mingo@...hat.com, ak@...ux.intel.com, linux-kernel@...r.kernel.org Cc: alexander.shishkin@...ux.intel.com, jolsa@...hat.com, alexey.budankov@...ux.intel.com, eranian@...gle.com, like.xu@...ux.intel.com, yao.jin@...ux.intel.com, Kan Liang <kan.liang@...ux.intel.com> Subject: [PATCH] perf/x86/intel/lbr: Enable NO_{CYCLES,FLAGS} for all LBR formats From: Kan Liang <kan.liang@...ux.intel.com> An option to disable reading branch flags/cycles was introduced in commit b16a5b52eb90 ("perf/x86: Add option to disable reading branch flags/cycles"). Currently, the option is only supported by the LBR_FORMAT_INFO format. For the other LBR formats, including the legacy LBR, Architectural LBR, and LBR PEBS record, there is no effect with the NO_{CYCLES,FLAGS} flag set. The flags/cycles information is still output, which breaks the ABI. For all LBR formats, avoid output flags and cycles if the user explicitly sets PERF_SAMPLE_BRANCH_NO_{CYCLES,FLAGS} branch type. For Architectural LBR, the branch type information is retrieved from the LBR_INFO field/MSR. With the NO_{CYCLES,FLAGS} flag set, the LBR_INFO will not be read. The branch type information will relay on the software decoding just like the legacy LBR. Fixes: b16a5b52eb90 ("perf/x86: Add option to disable reading branch flags/cycles") Signed-off-by: Kan Liang <kan.liang@...ux.intel.com> --- arch/x86/events/intel/lbr.c | 130 ++++++++++++++++++++++++++------------------ 1 file changed, 78 insertions(+), 52 deletions(-) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 63f58bd..944291a 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -799,6 +799,14 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) cpuc->lbr_stack.hw_idx = tos; } +static inline bool lbr_need_info(struct cpu_hw_events *cpuc) +{ + if (cpuc->lbr_sel) + return !(cpuc->lbr_sel->config & LBR_NO_INFO); + + return false; +} + /* * Due to lack of segmentation in Linux the effective address (offset) * is the same as the linear address, allowing us to merge the LIP and EIP @@ -806,7 +814,8 @@ void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) */ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { - bool need_info = false, call_stack = false; + bool need_info = lbr_need_info(cpuc); + bool call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); @@ -814,11 +823,8 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) int out = 0; int num = x86_pmu.lbr_nr; - if (cpuc->lbr_sel) { - need_info = !(cpuc->lbr_sel->config & LBR_NO_INFO); - if (cpuc->lbr_sel->config & LBR_CALL_STACK) - call_stack = true; - } + if (cpuc->lbr_sel && (cpuc->lbr_sel->config & LBR_CALL_STACK)) + call_stack = true; for (i = 0; i < num; i++) { unsigned long lbr_idx = (tos - i) & mask; @@ -849,23 +855,28 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) } if (lbr_format == LBR_FORMAT_TIME) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; skip = 1; - cycles = ((to >> 48) & LBR_INFO_CYCLES); - to = (u64)((((s64)to) << 16) >> 16); + if (need_info) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + cycles = ((to >> 48) & LBR_INFO_CYCLES); + } } if (lbr_flags & LBR_EIP_FLAGS) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; skip = 1; + if (need_info) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + } } if (lbr_flags & LBR_TSX) { - in_tx = !!(from & LBR_FROM_FLAG_IN_TX); - abort = !!(from & LBR_FROM_FLAG_ABORT); skip = 3; + if (need_info) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + } } from = (u64)((((s64)from) << skip) >> skip); @@ -928,8 +939,21 @@ static __always_inline bool get_lbr_cycles(u64 info) return info & LBR_INFO_CYCLES; } +enum { + ARCH_LBR_BR_TYPE_JCC = 0, + ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, + ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, + ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, + ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, + ARCH_LBR_BR_TYPE_NEAR_RET = 5, + ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, + + ARCH_LBR_BR_TYPE_MAP_MAX = 16, +}; + static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, - struct lbr_entry *entries) + struct lbr_entry *entries, + bool need_info) { struct perf_branch_entry *e; struct lbr_entry *lbr; @@ -948,16 +972,33 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, break; to = rdlbr_to(i, lbr); - info = rdlbr_info(i, lbr); e->from = from; e->to = to; - e->mispred = get_lbr_mispred(info); - e->predicted = get_lbr_predicted(info); - e->in_tx = !!(info & LBR_INFO_IN_TX); - e->abort = !!(info & LBR_INFO_ABORT); - e->cycles = get_lbr_cycles(info); - e->type = get_lbr_br_type(info); + if (need_info) { + info = rdlbr_info(i, lbr); + e->mispred = get_lbr_mispred(info); + e->predicted = get_lbr_predicted(info); + e->in_tx = !!(info & LBR_INFO_IN_TX); + e->abort = !!(info & LBR_INFO_ABORT); + e->cycles = get_lbr_cycles(info); + e->type = get_lbr_br_type(info); + } else { + e->mispred = 0; + e->predicted = 0; + e->in_tx = 0; + e->abort = 0; + e->cycles = 0; + /* + * For Architectural LBR, 0 means X86_BR_JCC. Assign an + * invalid branch type, which will be ignored in the + * intel_pmu_lbr_filter(). + * + * For the legacy LBR, there is no branch type + * information available. The field is always ignored. + */ + e->type = ARCH_LBR_BR_TYPE_KNOWN_MAX + 1; + } e->reserved = 0; } @@ -966,7 +1007,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) { - intel_pmu_store_lbr(cpuc, NULL); + intel_pmu_store_lbr(cpuc, NULL, lbr_need_info(cpuc)); } static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) @@ -974,12 +1015,12 @@ static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc) struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave; if (!xsave) { - intel_pmu_store_lbr(cpuc, NULL); + intel_pmu_store_lbr(cpuc, NULL, lbr_need_info(cpuc)); return; } copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR); - intel_pmu_store_lbr(cpuc, xsave->lbr.entries); + intel_pmu_store_lbr(cpuc, xsave->lbr.entries, lbr_need_info(cpuc)); } void intel_pmu_lbr_read(void) @@ -1096,23 +1137,20 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) reg = &event->hw.branch_reg; reg->idx = EXTRA_REG_LBR; - if (static_cpu_has(X86_FEATURE_ARCH_LBR)) { + if (static_cpu_has(X86_FEATURE_ARCH_LBR)) reg->config = mask; - return 0; + else { + /* + * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate + * in suppress mode. So LBR_SELECT should be set to + * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) + * But the 10th bit LBR_CALL_STACK does not operate + * in suppress mode. + */ + reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); } - - /* - * The first 9 bits (LBR_SEL_MASK) in LBR_SELECT operate - * in suppress mode. So LBR_SELECT should be set to - * (~mask & LBR_SEL_MASK) | (mask & ~LBR_SEL_MASK) - * But the 10th bit LBR_CALL_STACK does not operate - * in suppress mode. - */ - reg->config = mask ^ (x86_pmu.lbr_sel_mask & ~LBR_CALL_STACK); - if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && - (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && - (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) + (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS)) reg->config |= LBR_NO_INFO; return 0; @@ -1357,18 +1395,6 @@ common_branch_type(int type) return PERF_BR_UNKNOWN; } -enum { - ARCH_LBR_BR_TYPE_JCC = 0, - ARCH_LBR_BR_TYPE_NEAR_IND_JMP = 1, - ARCH_LBR_BR_TYPE_NEAR_REL_JMP = 2, - ARCH_LBR_BR_TYPE_NEAR_IND_CALL = 3, - ARCH_LBR_BR_TYPE_NEAR_REL_CALL = 4, - ARCH_LBR_BR_TYPE_NEAR_RET = 5, - ARCH_LBR_BR_TYPE_KNOWN_MAX = ARCH_LBR_BR_TYPE_NEAR_RET, - - ARCH_LBR_BR_TYPE_MAP_MAX = 16, -}; - static const int arch_lbr_br_type_map[ARCH_LBR_BR_TYPE_MAP_MAX] = { [ARCH_LBR_BR_TYPE_JCC] = X86_BR_JCC, [ARCH_LBR_BR_TYPE_NEAR_IND_JMP] = X86_BR_IND_JMP, @@ -1460,7 +1486,7 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr) else cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos(); - intel_pmu_store_lbr(cpuc, lbr); + intel_pmu_store_lbr(cpuc, lbr, lbr_need_info(cpuc)); intel_pmu_lbr_filter(cpuc); } -- 2.7.4
Powered by blists - more mailing lists