[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e8b5086e-735f-51ea-f413-3b2d456c0e32@linux.intel.com>
Date: Mon, 29 Aug 2022 11:55:12 -0400
From: "Liang, Kan" <kan.liang@...ux.intel.com>
To: Peter Zijlstra <peterz@...radead.org>, x86@...nel.org,
eranian@...gle.com, ravi.bangoria@....com
Cc: linux-kernel@...r.kernel.org, acme@...nel.org,
mark.rutland@....com, alexander.shishkin@...ux.intel.com,
jolsa@...nel.org, namhyung@...nel.org
Subject: Re: [PATCH v2 9/9] perf/x86/intel: Optimize short PEBS counters
On 2022-08-29 6:10 a.m., Peter Zijlstra wrote:
> XXX: crazy idea; really not sure this is worth the extra complexity
>
> It is possible to have the counter programmed to a value smaller than
> the sampling period.
I'm not quite sure how the above case can be triggered.
For the most of the cases, the pmc_prev_left[idx] should be the same as
the hwc->period_left.
For the left < 2 or the limit_period case, I think perf usually program
a larger value, so the pmc_prev_left[idx] > hwc->period_left.
It looks like the only case, which triggers the pmc_prev_left[idx] <
hwc->period_left, is the left > max_period. I don't think it's common
for a user to set a period which is larger than the HW counter limit.
Even if they set a huge period, the PEBS overhead should not be an
issue, since it may causes days to trigger a sample.
If so, it may not be a good idea to introduce such complexity to only
handle such rare cases.
Thanks,
Kan
> In that case, the code suppresses the sample,
> recalculates the remaining events and reprograms the counter.
>
> This should also work for PEBS counters (and it does); however
> triggering a full PEBS assist and parsing the event from the DS is
> more overhead than is required.
>
> As such, detect this case and temporarily suppress PEBS. This will
> then trigger a regular PMI for the counter which will reprogram the
> event and re-enable PEBS once the target period is in reach.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
> ---
> arch/x86/events/intel/core.c | 80 ++++++++++++++++++++++++++++++++++++++-----
> arch/x86/events/perf_event.h | 9 ++++
> 2 files changed, 81 insertions(+), 8 deletions(-)
>
> --- a/arch/x86/events/intel/core.c
> +++ b/arch/x86/events/intel/core.c
> @@ -2722,12 +2722,7 @@ static void intel_pmu_enable_fixed(struc
>
> intel_set_masks(event, idx);
>
> - /*
> - * Enable IRQ generation (0x8), if not PEBS,
> - * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
> - * if requested:
> - */
> - if (!event->attr.precise_ip)
> + if (hwc->config & ARCH_PERFMON_EVENTSEL_INT)
> bits |= 0x8;
> if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
> bits |= 0x2;
> @@ -2816,12 +2811,75 @@ int intel_pmu_save_and_restart(struct pe
> return static_call(x86_pmu_set_period)(event);
> }
>
> +static void intel_pmu_update_config(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + u64 config = hwc->config;
> +
> + if (hwc->idx >= INTEL_PMC_IDX_FIXED) { /* PEBS is limited to real PMCs */
> + u64 mask = 0xf, bits = 0;
> +
> + if (config & ARCH_PERFMON_EVENTSEL_INT)
> + bits |= 0x8;
> + if (config & ARCH_PERFMON_EVENTSEL_USR)
> + bits |= 0x2;
> + if (config & ARCH_PERFMON_EVENTSEL_OS)
> + bits |= 0x1;
> +
> + bits <<= (hwc->idx * 4);
> + mask <<= (hwc->idx * 4);
> +
> + config = this_cpu_read(intel_fixed_ctrl);
> + config &= ~mask;
> + config |= bits;
> + this_cpu_write(intel_fixed_ctrl, config);
> + }
> +
> + wrmsrl(hwc->config_base, config);
> +}
> +
> +static void intel_pmu_handle_short_pebs(struct perf_event *event)
> +{
> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> + struct hw_perf_event *hwc = &event->hw;
> +
> + /* if the event is not enabled; intel_pmu_pebs_enable() DTRT */
> + if (!test_bit(hwc->idx, cpuc->active_mask))
> + return;
> +
> + WARN_ON_ONCE(cpuc->enabled);
> +
> + if (intel_pmu_is_short_pebs(event)) {
> +
> + /* stripped down intel_pmu_pebs_disable() */
> + cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
> + hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
> +
> + intel_pmu_update_config(event);
> +
> + } else if (!(cpuc->pebs_enabled & (1ULL << hwc->idx))) {
> +
> + /* stripped down intel_pmu_pebs_enable() */
> + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
> + cpuc->pebs_enabled |= (1ULL << hwc->idx);
> +
> + intel_pmu_update_config(event);
> + }
> +}
> +
> static int intel_pmu_set_period(struct perf_event *event)
> {
> + int ret;
> +
> if (unlikely(is_topdown_count(event)))
> return static_call(intel_pmu_set_topdown_event_period)(event);
>
> - return x86_perf_event_set_period(event);
> + ret = x86_perf_event_set_period(event);
> +
> + if (event->attr.precise_ip)
> + intel_pmu_handle_short_pebs(event);
> +
> + return ret;
> }
>
> static u64 intel_pmu_update(struct perf_event *event)
> @@ -2975,6 +3033,9 @@ static int handle_pmi_common(struct pt_r
> * MSR_IA32_PEBS_ENABLE is not updated. Because the
> * cpuc->enabled has been forced to 0 in PMI.
> * Update the MSR if pebs_enabled is changed.
> + *
> + * Also; short counters temporarily disable PEBS, see
> + * intel_pmu_set_period().
> */
> if (pebs_enabled != cpuc->pebs_enabled)
> wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
> @@ -3856,7 +3917,10 @@ static int intel_pmu_hw_config(struct pe
> if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
> return -EINVAL;
>
> - if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
> + if (!(event->attr.freq ||
> + (event->attr.wakeup_events && !event->attr.watermark) ||
> + event->attr.sample_period > x86_pmu.max_period)) {
> +
> event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
> if (!(event->attr.sample_type &
> ~intel_pmu_large_pebs_flags(event))) {
> --- a/arch/x86/events/perf_event.h
> +++ b/arch/x86/events/perf_event.h
> @@ -1063,6 +1063,15 @@ static inline bool x86_pmu_has_lbr_calls
> DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
> DECLARE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
>
> +static inline bool intel_pmu_is_short_pebs(struct perf_event *event)
> +{
> + struct hw_perf_event *hwc = &event->hw;
> + s64 counter = this_cpu_read(pmc_prev_left[hwc->idx]);
> + s64 left = local64_read(&hwc->period_left);
> +
> + return counter < left;
> +}
> +
> int x86_perf_event_set_period(struct perf_event *event);
>
> /*
>
>
Powered by blists - more mailing lists