lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CABPqkBS+k4Om3-sQWGBFN-imhiU8fXYsiDR1XAyp0Ro3uknCHw@mail.gmail.com>
Date: Thu, 17 Apr 2025 21:38:05 -0700
From: Stephane Eranian <eranian@...gle.com>
To: Sandipan Das <sandipan.das@....com>
Cc: linux-perf-users@...r.kernel.org, linux-kernel@...r.kernel.org, 
	peterz@...radead.org, mingo@...hat.com, acme@...nel.org, namhyung@...nel.org, 
	mark.rutland@....com, alexander.shishkin@...ux.intel.com, jolsa@...nel.org, 
	irogers@...gle.com, adrian.hunter@...el.com, kan.liang@...ux.intel.com, 
	tglx@...utronix.de, bp@...en8.de, dave.hansen@...ux.intel.com, x86@...nel.org, 
	hpa@...or.com, songliubraving@...a.com, ravi.bangoria@....com, 
	ananth.narayan@....com
Subject: Re: [PATCH v2 3/5] perf/x86/amd/uncore: Use hrtimer for handling overflows

On Thu, Apr 17, 2025 at 8:44 PM Sandipan Das <sandipan.das@....com> wrote:
>
> Uncore counters do not provide mechanisms like interrupts to report
> overflows and the accumulated user-visible count is incorrect if there
> is more than one overflow between two successive read requests for the
> same event because the value of prev_count goes out-of-date for
> calculating the correct delta.
>
> To avoid this, start a hrtimer to periodically initiate a pmu->read() of
> the active counters for keeping prev_count up-to-date. It should be
> noted that the hrtimer duration should be lesser than the shortest time
> it takes for a counter to overflow for this approach to be effective.
>
The problem I see is that the number of uncore PMU varies a lot based
on the CPU model, in particular due to the L3 PMU.
Is there a timer armed per CCX or only a global one that will generate
IPI to all other CPUs?

> Signed-off-by: Sandipan Das <sandipan.das@....com>
> ---
>  arch/x86/events/amd/uncore.c | 63 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
>
> diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
> index 010024f09f2c..e09bfbb4a4cd 100644
> --- a/arch/x86/events/amd/uncore.c
> +++ b/arch/x86/events/amd/uncore.c
> @@ -21,6 +21,7 @@
>  #define NUM_COUNTERS_NB                4
>  #define NUM_COUNTERS_L2                4
>  #define NUM_COUNTERS_L3                6
> +#define NUM_COUNTERS_MAX       64
>
>  #define RDPMC_BASE_NB          6
>  #define RDPMC_BASE_LLC         10
> @@ -38,6 +39,10 @@ struct amd_uncore_ctx {
>         int refcnt;
>         int cpu;
>         struct perf_event **events;
> +       unsigned long active_mask[BITS_TO_LONGS(NUM_COUNTERS_MAX)];
> +       int nr_active;
> +       struct hrtimer hrtimer;
> +       u64 hrtimer_duration;
>  };
>
>  struct amd_uncore_pmu {
> @@ -87,6 +92,42 @@ static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
>         return container_of(event->pmu, struct amd_uncore_pmu, pmu);
>  }
>
> +static enum hrtimer_restart amd_uncore_hrtimer(struct hrtimer *hrtimer)
> +{
> +       struct amd_uncore_ctx *ctx;
> +       struct perf_event *event;
> +       int bit;
> +
> +       ctx = container_of(hrtimer, struct amd_uncore_ctx, hrtimer);
> +
> +       if (!ctx->nr_active || ctx->cpu != smp_processor_id())
> +               return HRTIMER_NORESTART;
> +
> +       for_each_set_bit(bit, ctx->active_mask, NUM_COUNTERS_MAX) {
> +               event = ctx->events[bit];
> +               event->pmu->read(event);
> +       }
> +
> +       hrtimer_forward_now(hrtimer, ns_to_ktime(ctx->hrtimer_duration));
> +       return HRTIMER_RESTART;
> +}
> +
> +static void amd_uncore_start_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> +       hrtimer_start(&ctx->hrtimer, ns_to_ktime(ctx->hrtimer_duration),
> +                     HRTIMER_MODE_REL_PINNED_HARD);
> +}
> +
> +static void amd_uncore_cancel_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> +       hrtimer_cancel(&ctx->hrtimer);
> +}
> +
> +static void amd_uncore_init_hrtimer(struct amd_uncore_ctx *ctx)
> +{
> +       hrtimer_setup(&ctx->hrtimer, amd_uncore_hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
> +}
> +
>  static void amd_uncore_read(struct perf_event *event)
>  {
>         struct hw_perf_event *hwc = &event->hw;
> @@ -117,18 +158,26 @@ static void amd_uncore_read(struct perf_event *event)
>
>  static void amd_uncore_start(struct perf_event *event, int flags)
>  {
> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>         struct hw_perf_event *hwc = &event->hw;
>
> +       if (!ctx->nr_active++)
> +               amd_uncore_start_hrtimer(ctx);
> +
>         if (flags & PERF_EF_RELOAD)
>                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
>
>         hwc->state = 0;
> +       __set_bit(hwc->idx, ctx->active_mask);
>         wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE));
>         perf_event_update_userpage(event);
>  }
>
>  static void amd_uncore_stop(struct perf_event *event, int flags)
>  {
> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>         struct hw_perf_event *hwc = &event->hw;
>
>         wrmsrl(hwc->config_base, hwc->config);
> @@ -138,6 +187,11 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
>                 event->pmu->read(event);
>                 hwc->state |= PERF_HES_UPTODATE;
>         }
> +
> +       if (!--ctx->nr_active)
> +               amd_uncore_cancel_hrtimer(ctx);
> +
> +       __clear_bit(hwc->idx, ctx->active_mask);
>  }
>
>  static int amd_uncore_add(struct perf_event *event, int flags)
> @@ -490,6 +544,9 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
>                                 goto fail;
>                         }
>
> +                       amd_uncore_init_hrtimer(curr);
> +                       curr->hrtimer_duration = 60LL * NSEC_PER_SEC;
> +
>                         cpumask_set_cpu(cpu, &pmu->active_mask);
>                 }
>
> @@ -879,12 +936,18 @@ static int amd_uncore_umc_event_init(struct perf_event *event)
>
>  static void amd_uncore_umc_start(struct perf_event *event, int flags)
>  {
> +       struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
> +       struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
>         struct hw_perf_event *hwc = &event->hw;
>
> +       if (!ctx->nr_active++)
> +               amd_uncore_start_hrtimer(ctx);
> +
>         if (flags & PERF_EF_RELOAD)
>                 wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
>
>         hwc->state = 0;
> +       __set_bit(hwc->idx, ctx->active_mask);
>         wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
>         perf_event_update_userpage(event);
>  }
> --
> 2.43.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ