[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAM9d7chMn7Gmc4FYn_ZjMiojUCao90e80Zg5+hNXQ7MTeHrK_A@mail.gmail.com>
Date: Thu, 9 Dec 2021 13:51:42 -0800
From: Namhyung Kim <namhyung@...nel.org>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Jiri Olsa <jolsa@...hat.com>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
LKML <linux-kernel@...r.kernel.org>,
Stephane Eranian <eranian@...gle.com>,
Andi Kleen <ak@...ux.intel.com>,
Ian Rogers <irogers@...gle.com>,
Song Liu <songliubraving@...com>
Subject: Re: [PATCH v3] perf/core: Set event shadow time for inactive events too
On Thu, Dec 9, 2021 at 3:35 AM Peter Zijlstra <peterz@...radead.org> wrote:
>
> On Thu, Dec 09, 2021 at 12:26:32PM +0100, Peter Zijlstra wrote:
> > On Sun, Dec 05, 2021 at 02:48:43PM -0800, Namhyung Kim wrote:
> >
> > > Actually 18446744069443110306 is 0xffffffff01b345a2 so it seems to
> > > have a negative enabled time. In fact, bperf keeps values returned by
> > > bpf_perf_event_read_value() which calls perf_event_read_local(), and
> > > accumulates delta between two calls. When event->shadow_ctx_time is
> > > not set, it'd return invalid enabled time which is bigger than normal.
> >
> > *that*, how does it happen that shadow_time isn't set? It should be last
> > set when the event switches to INACTIVE, no? At which point the logic in
> > perf_event_read_local() should make @enabled move forward while @running
> > stays put.
> >
> > Let me go rummage around a bit... either I'm missing something obvious
> > or something's smelly.
>
> How's this then?
Still the same :(
Maybe because the event is enabled from the beginning.
Then it might miss set_state/update_time at all.
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 7b7525e9155f..82baa8bdfc86 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -686,9 +686,7 @@ struct perf_event {
> /*
> * timestamp shadows the actual context timing but it can
> * be safely used in NMI interrupt context. It reflects the
> - * context time as it was when the event was last scheduled in,
> - * or when ctx_sched_in failed to schedule the event because we
> - * run out of PMC.
> + * context time as it was when the event was last scheduled in.
> *
> * ctx_time already accounts for ctx->timestamp. Therefore to
> * compute ctx_time for a sample, simply add perf_clock().
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 28aaeacdaea1..20637b7f420c 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -640,6 +640,9 @@ __perf_update_times(struct perf_event *event, u64 now, u64 *enabled, u64 *runnin
> *running += delta;
> }
>
> +static void perf_set_shadow_time(struct perf_event *event,
> + struct perf_event_context *ctx);
> +
> static void perf_event_update_time(struct perf_event *event)
> {
> u64 now = perf_event_time(event);
> @@ -647,6 +650,7 @@ static void perf_event_update_time(struct perf_event *event)
> __perf_update_times(event, now, &event->total_time_enabled,
> &event->total_time_running);
> event->tstamp = now;
> + perf_set_shadow_time(event, event->ctx);
I like this.
> }
>
> static void perf_event_update_sibling_time(struct perf_event *leader)
> @@ -2552,8 +2556,6 @@ event_sched_in(struct perf_event *event,
>
> perf_pmu_disable(event->pmu);
>
> - perf_set_shadow_time(event, ctx);
> -
> perf_log_itrace_start(event);
>
> if (event->pmu->add(event, PERF_EF_START)) {
> @@ -3707,29 +3709,6 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
> return 0;
> }
>
> -static inline bool event_update_userpage(struct perf_event *event)
> -{
> - if (likely(!atomic_read(&event->mmap_count)))
> - return false;
> -
> - perf_event_update_time(event);
> - perf_set_shadow_time(event, event->ctx);
> - perf_event_update_userpage(event);
> -
> - return true;
> -}
> -
> -static inline void group_update_userpage(struct perf_event *group_event)
> -{
> - struct perf_event *event;
> -
> - if (!event_update_userpage(group_event))
> - return;
> -
> - for_each_sibling_event(event, group_event)
> - event_update_userpage(event);
> -}
> -
> static int merge_sched_in(struct perf_event *event, void *data)
> {
> struct perf_event_context *ctx = event->ctx;
> @@ -3748,15 +3727,14 @@ static int merge_sched_in(struct perf_event *event, void *data)
> }
>
> if (event->state == PERF_EVENT_STATE_INACTIVE) {
> - *can_add_hw = 0;
> if (event->attr.pinned) {
> perf_cgroup_event_disable(event, ctx);
> perf_event_set_state(event, PERF_EVENT_STATE_ERROR);
> - } else {
> - ctx->rotate_necessary = 1;
> - perf_mux_hrtimer_restart(cpuctx);
> - group_update_userpage(event);
> }
> +
> + *can_add_hw = 0;
> + ctx->rotate_necessary = 1;
> + perf_mux_hrtimer_restart(cpuctx);
Not sure about this. We might not want to rotate them
if a pinned event failed...?
Thanks,
Namhyung
> }
>
> return 0;
> @@ -6349,7 +6327,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
> ring_buffer_attach(event, rb);
>
> perf_event_update_time(event);
> - perf_set_shadow_time(event, event->ctx);
> perf_event_init_userpage(event);
> perf_event_update_userpage(event);
> } else {
Powered by blists - more mailing lists