[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAP-5=fWFeqEK4woCj2ngjxMi4B4EZ3y0gLN+qNu4oNfp4wG8xA@mail.gmail.com>
Date: Mon, 20 Nov 2023 14:41:26 -0800
From: Ian Rogers <irogers@...gle.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
LKML <linux-kernel@...r.kernel.org>,
Kan Liang <kan.liang@...ux.intel.com>,
Mingwei Zhang <mizhang@...gle.com>
Subject: Re: [PATCH 1/3] perf/core: Update perf_adjust_freq_unthr_context()
On Mon, Nov 20, 2023 at 2:19 PM Namhyung Kim <namhyung@...nel.org> wrote:
>
> It was unnecessarily disabling and enabling PMUs for each event. It
> should be done at PMU level. Add pmu_ctx->nr_freq counter to check it
> at each PMU. As pmu context has separate active lists for pinned group
> and flexible group, factor out a new function to do the job.
>
> Another minor optimization is that it can skip PMUs w/ CAP_NO_INTERRUPT
> even if it needs to unthrottle sampling events.
>
> Signed-off-by: Namhyung Kim <namhyung@...nel.org>
Series:
Reviewed-by: Ian Rogers <irogers@...gle.com>
Thanks,
Ian
> ---
> include/linux/perf_event.h | 1 +
> kernel/events/core.c | 68 +++++++++++++++++++++++---------------
> 2 files changed, 43 insertions(+), 26 deletions(-)
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 0367d748fae0..3eb17dc89f5e 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -879,6 +879,7 @@ struct perf_event_pmu_context {
>
> unsigned int nr_events;
> unsigned int nr_cgroups;
> + unsigned int nr_freq;
>
> atomic_t refcount; /* event <-> epc */
> struct rcu_head rcu_head;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 3eb26c2c6e65..53e2ad73102d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -2275,8 +2275,10 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
>
> if (!is_software_event(event))
> cpc->active_oncpu--;
> - if (event->attr.freq && event->attr.sample_freq)
> + if (event->attr.freq && event->attr.sample_freq) {
> ctx->nr_freq--;
> + epc->nr_freq--;
> + }
> if (event->attr.exclusive || !cpc->active_oncpu)
> cpc->exclusive = 0;
>
> @@ -2531,9 +2533,10 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
>
> if (!is_software_event(event))
> cpc->active_oncpu++;
> - if (event->attr.freq && event->attr.sample_freq)
> + if (event->attr.freq && event->attr.sample_freq) {
> ctx->nr_freq++;
> -
> + epc->nr_freq++;
> + }
> if (event->attr.exclusive)
> cpc->exclusive = 1;
>
> @@ -4096,30 +4099,14 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
> }
> }
>
> -/*
> - * combine freq adjustment with unthrottling to avoid two passes over the
> - * events. At the same time, make sure, having freq events does not change
> - * the rate of unthrottling as that would introduce bias.
> - */
> -static void
> -perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
> +static void perf_adjust_freq_unthr_events(struct list_head *event_list)
> {
> struct perf_event *event;
> struct hw_perf_event *hwc;
> u64 now, period = TICK_NSEC;
> s64 delta;
>
> - /*
> - * only need to iterate over all events iff:
> - * - context have events in frequency mode (needs freq adjust)
> - * - there are events to unthrottle on this cpu
> - */
> - if (!(ctx->nr_freq || unthrottle))
> - return;
> -
> - raw_spin_lock(&ctx->lock);
> -
> - list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
> + list_for_each_entry(event, event_list, active_list) {
> if (event->state != PERF_EVENT_STATE_ACTIVE)
> continue;
>
> @@ -4127,8 +4114,6 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
> if (!event_filter_match(event))
> continue;
>
> - perf_pmu_disable(event->pmu);
> -
> hwc = &event->hw;
>
> if (hwc->interrupts == MAX_INTERRUPTS) {
> @@ -4138,7 +4123,7 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
> }
>
> if (!event->attr.freq || !event->attr.sample_freq)
> - goto next;
> + continue;
>
> /*
> * stop the event and update event->count
> @@ -4160,8 +4145,39 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
> perf_adjust_period(event, period, delta, false);
>
> event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
> - next:
> - perf_pmu_enable(event->pmu);
> + }
> +}
> +
> +/*
> + * combine freq adjustment with unthrottling to avoid two passes over the
> + * events. At the same time, make sure, having freq events does not change
> + * the rate of unthrottling as that would introduce bias.
> + */
> +static void
> +perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
> +{
> + struct perf_event_pmu_context *pmu_ctx;
> +
> + /*
> + * only need to iterate over all events iff:
> + * - context have events in frequency mode (needs freq adjust)
> + * - there are events to unthrottle on this cpu
> + */
> + if (!(ctx->nr_freq || unthrottle))
> + return;
> +
> + raw_spin_lock(&ctx->lock);
> +
> + list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
> + if (!(pmu_ctx->nr_freq || unthrottle))
> + continue;
> + if (pmu_ctx->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT)
> + continue;
> +
> + perf_pmu_disable(pmu_ctx->pmu);
> + perf_adjust_freq_unthr_events(&pmu_ctx->pinned_active);
> + perf_adjust_freq_unthr_events(&pmu_ctx->flexible_active);
> + perf_pmu_enable(pmu_ctx->pmu);
> }
>
> raw_spin_unlock(&ctx->lock);
> --
> 2.43.0.rc1.413.gea7ed67945-goog
>
Powered by blists - more mailing lists