[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230816143628.1885281-2-kan.liang@linux.intel.com>
Date: Wed, 16 Aug 2023 07:36:24 -0700
From: kan.liang@...ux.intel.com
To: peterz@...radead.org, mingo@...hat.com, acme@...nel.org,
linux-kernel@...r.kernel.org
Cc: mark.rutland@....com, alexander.shishkin@...ux.intel.com,
jolsa@...nel.org, namhyung@...nel.org, irogers@...gle.com,
adrian.hunter@...el.com, ak@...ux.intel.com, eranian@...gle.com,
alexey.v.bayduraev@...ux.intel.com, tinghao.zhang@...el.com,
Kan Liang <kan.liang@...ux.intel.com>
Subject: [PATCH V3 2/6] perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag
From: Kan Liang <kan.liang@...ux.intel.com>
Currently, branch_sample_type !=0 is used to check whether a branch
stack setup is required. But it doesn't check the sample type,
unnecessary branch stack setup may be done for a counting event. E.g.,
perf record -e "{branch-instructions,branch-misses}:S" -j any
Also, the event with the new PERF_SAMPLE_BRANCH_EVT_CNTRS branch sample
type may not require a branch stack setup either.
Add a new flag NEEDS_BRANCH_STACK to indicate whether the event requires
a branch stack setup. Replace the needs_branch_stack() by checking the
new flag.
The counting event check is implemented here. The later patch will take
the new PERF_SAMPLE_BRANCH_EVT_CNTRS into account.
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
---
New patch
arch/x86/events/intel/core.c | 14 +++++++++++---
arch/x86/events/perf_event_flags.h | 1 +
2 files changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 0f4ce79de4f8..8434315b765f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2518,9 +2518,14 @@ static void intel_pmu_assign_event(struct perf_event *event, int idx)
perf_report_aux_output_id(event, idx);
}
+static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event)
+{
+ return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+}
+
static void intel_pmu_del_event(struct perf_event *event)
{
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_del(event);
if (event->attr.precise_ip)
intel_pmu_pebs_del(event);
@@ -2831,7 +2836,7 @@ static void intel_pmu_add_event(struct perf_event *event)
{
if (event->attr.precise_ip)
intel_pmu_pebs_add(event);
- if (needs_branch_stack(event))
+ if (intel_pmu_needs_branch_stack(event))
intel_pmu_lbr_add(event);
}
@@ -3908,7 +3913,10 @@ static int intel_pmu_hw_config(struct perf_event *event)
x86_pmu.pebs_aliases(event);
}
- if (needs_branch_stack(event)) {
+ if (needs_branch_stack(event) && is_sampling_event(event))
+ event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK;
+
+ if (intel_pmu_needs_branch_stack(event)) {
ret = intel_pmu_setup_lbr_filter(event);
if (ret)
return ret;
diff --git a/arch/x86/events/perf_event_flags.h b/arch/x86/events/perf_event_flags.h
index 1dc19b9b4426..a1685981c520 100644
--- a/arch/x86/events/perf_event_flags.h
+++ b/arch/x86/events/perf_event_flags.h
@@ -20,3 +20,4 @@ PERF_ARCH(TOPDOWN, 0x04000) /* Count Topdown slots/metrics events */
PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */
PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */
PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */
+PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */
--
2.35.1
Powered by blists - more mailing lists