[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220804140729.2951259-1-kan.liang@linux.intel.com>
Date: Thu, 4 Aug 2022 07:07:29 -0700
From: kan.liang@...ux.intel.com
To: peterz@...radead.org, mingo@...hat.com, namhyung@...nel.org,
eranian@...gle.com, ak@...ux.intel.com,
alexander.shishkin@...ux.intel.com, linux-kernel@...r.kernel.org
Cc: Kan Liang <kan.liang@...ux.intel.com>
Subject: [RFC PATCH] perf/x86/intel: Optimize the access of the fixed counter control reg
From: Kan Liang <kan.liang@...ux.intel.com>
All the fixed counters share a fixed control register. The current perf
reads and re-writes the fixed control register for each fixed counter
disable/enable, which is unnecessary.
When changing the fixed control register, the entire PMU must be
disabled via the global control register. The changing cannot be taken
effect until the entire PMU is re-enabled. Only updating the fixed
control register once right before the entire PMU re-enabling is enough.
The read of the fixed control register is not necessary either. The
value can be cached in the per CPU cpu_hw_events.
Test results:
Counting all the fixed counters with the perf bench sched pipe as below
on a SPR machine.
$perf stat -e cycles,instructions,ref-cycles,slots --no-inherit --
taskset -c 1 perf bench sched pipe
The Total elapsed time reduces from 5.36s (without the patch) to 4.99s
(with the patch), which is ~6.9% improvement.
Signed-off-by: Kan Liang <kan.liang@...ux.intel.com>
---
arch/x86/events/intel/core.c | 22 +++++++++++++---------
arch/x86/events/perf_event.h | 4 ++++
2 files changed, 17 insertions(+), 9 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 45024abd929f..9e0ed28c9477 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2189,6 +2189,12 @@ static void __intel_pmu_enable_all(int added, bool pmi)
u64 intel_ctrl = hybrid(cpuc->pmu, intel_ctrl);
intel_pmu_lbr_enable_all(pmi);
+
+ if (cpuc->fixed_ctrl_val != cpuc->active_fixed_ctrl_val) {
+ wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, cpuc->fixed_ctrl_val);
+ cpuc->active_fixed_ctrl_val = cpuc->fixed_ctrl_val;
+ }
+
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
@@ -2406,9 +2412,10 @@ static inline void intel_clear_masks(struct perf_event *event, int idx)
static void intel_pmu_disable_fixed(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- u64 ctrl_val, mask;
int idx = hwc->idx;
+ u64 mask;
if (is_topdown_idx(idx)) {
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
@@ -2425,9 +2432,7 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
intel_clear_masks(event, idx);
mask = 0xfULL << ((idx - INTEL_PMC_IDX_FIXED) * 4);
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- wrmsrl(hwc->config_base, ctrl_val);
+ cpuc->fixed_ctrl_val &= ~mask;
}
static void intel_pmu_disable_event(struct perf_event *event)
@@ -2697,8 +2702,9 @@ static void intel_pmu_read_event(struct perf_event *event)
static void intel_pmu_enable_fixed(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- u64 ctrl_val, mask, bits = 0;
+ u64 mask, bits = 0;
int idx = hwc->idx;
if (is_topdown_idx(idx)) {
@@ -2742,10 +2748,8 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
}
- rdmsrl(hwc->config_base, ctrl_val);
- ctrl_val &= ~mask;
- ctrl_val |= bits;
- wrmsrl(hwc->config_base, ctrl_val);
+ cpuc->fixed_ctrl_val &= ~mask;
+ cpuc->fixed_ctrl_val |= bits;
}
static void intel_pmu_enable_event(struct perf_event *event)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 21a5482bcf84..0a616c729aa5 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -270,6 +270,10 @@ struct cpu_hw_events {
u64 active_pebs_data_cfg;
int pebs_record_size;
+ /* Intel Fixed counter configuration */
+ u64 fixed_ctrl_val;
+ u64 active_fixed_ctrl_val;
+
/*
* Intel LBR bits
*/
--
2.35.1
Powered by blists - more mailing lists