[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bf63dee4-d25f-89d8-1893-572d84cfa667@linux.intel.com>
Date: Fri, 3 Jul 2020 16:59:49 -0400
From: "Liang, Kan" <kan.liang@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: mingo@...hat.com, acme@...nel.org, tglx@...utronix.de,
bp@...en8.de, x86@...nel.org, linux-kernel@...r.kernel.org,
mark.rutland@....com, alexander.shishkin@...ux.intel.com,
jolsa@...hat.com, namhyung@...nel.org, dave.hansen@...el.com,
yu-cheng.yu@...el.com, bigeasy@...utronix.de, gorcunov@...il.com,
hpa@...or.com, alexey.budankov@...ux.intel.com, eranian@...gle.com,
ak@...ux.intel.com, like.xu@...ux.intel.com,
yao.jin@...ux.intel.com, wei.w.wang@...el.com
Subject: Re: [PATCH V3 13/23] perf/x86/intel/lbr: Factor out
intel_pmu_store_lbr
On 7/3/2020 3:50 PM, Peter Zijlstra wrote:
> On Fri, Jul 03, 2020 at 05:49:19AM -0700, kan.liang@...ux.intel.com wrote:
>> +static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
>> + struct lbr_entry *entries)
>> +{
>> + struct perf_branch_entry *e;
>> + struct lbr_entry *lbr;
>> + u64 from, to, info;
>> + int i;
>> +
>> + for (i = 0; i < x86_pmu.lbr_nr; i++) {
>> + lbr = entries ? &entries[i] : NULL;
>> + e = &cpuc->lbr_entries[i];
>> +
>> + from = rdlbr_from(i, lbr);
>> + /*
>> + * Read LBR entries until invalid entry (0s) is detected.
>> + */
>> + if (!from)
>> + break;
>> +
>> + to = rdlbr_to(i, lbr);
>> + info = rdlbr_info(i, lbr);
>> +
>> + e->from = from;
>> + e->to = to;
>> + e->mispred = !!(info & LBR_INFO_MISPRED);
>> + e->predicted = !(info & LBR_INFO_MISPRED);
>> + e->in_tx = !!(info & LBR_INFO_IN_TX);
>> + e->abort = !!(info & LBR_INFO_ABORT);
>> + e->cycles = info & LBR_INFO_CYCLES;
>> + e->type = 0;
>> + e->reserved = 0;
>> + }
>> +
>> + cpuc->lbr_stack.nr = i;
>> +}
>
> If I'm not mistaken, this correctly deals with LBR_FORMAT_INFO, so can't
> we also use the intel_pmu_arch_lbr_read() function for that case?
But the intel_pmu_arch_lbr_read() doesn't have the optimization
(LBR_NO_INFO) for the LBR_FORMAT_INFO.
https://lkml.kernel.org/r/tip-b16a5b52eb90d92b597257778e51e1fdc6423e64@git.kernel.org
To apply the optimization, we need extra codes as below.
The problem is that the arch LBR XSAVES read and the adaptive PEBS read
don't need the optimization.
Also, the name intel_pmu_arch_lbr_read() becomes misleading.
LBR_FORMAT_INFO doesn't have an exact format as arch LBR.
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 213e814..9ff5ab7 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -929,7 +929,8 @@ static __always_inline bool get_lbr_cycles(u64 info)
}
static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
- struct lbr_entry *entries)
+ struct lbr_entry *entries,
+ bool need_info)
{
struct perf_branch_entry *e;
struct lbr_entry *lbr;
@@ -948,25 +949,36 @@ static void intel_pmu_store_lbr(struct
cpu_hw_events *cpuc,
break;
to = rdlbr_to(i, lbr);
- info = rdlbr_info(i, lbr);
e->from = from;
e->to = to;
- e->mispred = get_lbr_mispred(info);
- e->predicted = get_lbr_predicted(info);
- e->in_tx = !!(info & LBR_INFO_IN_TX);
- e->abort = !!(info & LBR_INFO_ABORT);
- e->cycles = get_lbr_cycles(info);
- e->type = get_lbr_br_type(info);
+ if (need_info) {
+ info = rdlbr_info(i, lbr);
+ e->mispred = get_lbr_mispred(info);
+ e->predicted = get_lbr_predicted(info);
+ e->in_tx = !!(info & LBR_INFO_IN_TX);
+ e->abort = !!(info & LBR_INFO_ABORT);
+ e->cycles = get_lbr_cycles(info);
+ e->type = get_lbr_br_type(info);
+ }
+
e->reserved = 0;
}
cpuc->lbr_stack.nr = i;
}
+static __always_inline bool lbr_need_info(struct cpu_hw_events *cpuc)
+{
+ if (cpuc->lbr_sel)
+ return !(cpuc->lbr_sel->config & LBR_NO_INFO);
+
+ return false;
+}
+
static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc)
{
- intel_pmu_store_lbr(cpuc, NULL);
+ intel_pmu_store_lbr(cpuc, NULL, lbr_need_info(cpuc));
}
static void intel_pmu_arch_lbr_read_xsave(struct cpu_hw_events *cpuc)
@@ -974,12 +986,12 @@ static void intel_pmu_arch_lbr_read_xsave(struct
cpu_hw_events *cpuc)
struct x86_perf_task_context_arch_lbr_xsave *xsave = cpuc->lbr_xsave;
if (!xsave) {
- intel_pmu_store_lbr(cpuc, NULL);
+ intel_pmu_store_lbr(cpuc, NULL, lbr_need_info(cpuc));
return;
}
copy_dynamic_supervisor_to_kernel(&xsave->xsave, XFEATURE_MASK_LBR);
- intel_pmu_store_lbr(cpuc, xsave->lbr.entries);
+ intel_pmu_store_lbr(cpuc, xsave->lbr.entries, lbr_need_info(cpuc));
}
void intel_pmu_lbr_read(void)
@@ -1460,7 +1472,7 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
else
cpuc->lbr_stack.hw_idx = intel_pmu_lbr_tos();
- intel_pmu_store_lbr(cpuc, lbr->lbr);
+ intel_pmu_store_lbr(cpuc, lbr->lbr, lbr_need_info(cpuc));
intel_pmu_lbr_filter(cpuc);
}
Thanks,
Kan
>
> Then we can delete that section from read_64...
>
> Index: linux-2.6/arch/x86/events/intel/core.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/events/intel/core.c
> +++ linux-2.6/arch/x86/events/intel/core.c
> @@ -4664,6 +4664,9 @@ __init int intel_pmu_init(void)
> x86_pmu.lbr_read = intel_pmu_lbr_read_32;
> }
>
> + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
> + x86_pmu.lbr_read = intel_pmu_arch_lbr_read;
> +
> if (boot_cpu_has(X86_FEATURE_ARCH_LBR))
> intel_pmu_arch_lbr_init();
>
Powered by blists - more mailing lists