[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAP-5=fUmoxMw9UkptfoMZUaSbLSnZ2hxJvYejMabyugLk_qYKQ@mail.gmail.com>
Date: Wed, 19 Nov 2025 23:21:35 -0800
From: Ian Rogers <irogers@...gle.com>
To: Namhyung Kim <namhyung@...nel.org>
Cc: Arnaldo Carvalho de Melo <acme@...nel.org>, James Clark <james.clark@...aro.org>,
Jiri Olsa <jolsa@...nel.org>, Adrian Hunter <adrian.hunter@...el.com>,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>,
LKML <linux-kernel@...r.kernel.org>, linux-perf-users@...r.kernel.org,
Steven Rostedt <rostedt@...dmis.org>, Josh Poimboeuf <jpoimboe@...nel.org>,
Indu Bhagat <indu.bhagat@...cle.com>, Jens Remus <jremus@...ux.ibm.com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>, linux-trace-kernel@...r.kernel.org,
bpf@...r.kernel.org
Subject: Re: [PATCH v5 6/6] perf tools: Flush remaining samples w/o deferred callchains
On Wed, Nov 19, 2025 at 9:29 PM Ian Rogers <irogers@...gle.com> wrote:
>
> On Wed, Nov 19, 2025 at 6:11 PM Namhyung Kim <namhyung@...nel.org> wrote:
> >
> > It's possible that some kernel samples don't have matching deferred
> > callchain records when the profiling session was ended before the
> > threads came back to userspace. Let's flush the samples before
> > finish the session.
> >
> > Also 32-bit systems can see partial mmap for the data. In that case,
> > deferred samples won't point to the correct data once the mapping moves
> > to the next portion of the file. Copy the original sample before it
> > unmaps the current data.
>
> I think it is simpler to always copy. We may have events from
> synthesis, inject, .. and not the reader. Relying on callers to know
> that someone made a copy of the event and to make a defensive copy on
> their behalf just feels error prone.
>
> In the python session API I need to deal with the lifetime of events.
> Currently the events are copied:
> https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/python.c?h=perf-tools-next#n507
> and I'm doing this for session tool callbacks:
> https://lore.kernel.org/lkml/20251029053413.355154-12-irogers@google.com/
> I think it can be made lazier by knowing the tool callback can assume
> the event and sample are valid. We can delay the copying of the
> event/sample for if the pyevent has a reference count >1 and we're
> returning out of the tool callback. Doing some kind of global
> knowledge in the reader for maintaining the correctness of memory, I'm
> just not clear on how to make it always work.
I believe we always reuse the memory for the event, per event, in pipe mode:
https://web.git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git/tree/tools/perf/util/session.c?h=perf-tools-next#n1868
so a lazy copy will be broken for the pipe mode case.
Thanks,
Ian
> > Signed-off-by: Namhyung Kim <namhyung@...nel.org>
> > ---
> > tools/perf/util/session.c | 98 +++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 98 insertions(+)
> >
> > diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
> > index 2e777fd1bcf6707b..b781e01ddcb4876b 100644
> > --- a/tools/perf/util/session.c
> > +++ b/tools/perf/util/session.c
> > @@ -1288,8 +1288,13 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool
> > struct deferred_event {
> > struct list_head list;
> > union perf_event *event;
> > + bool allocated;
> > };
> >
> > +/*
> > + * This is called when a deferred callchain record comes up. Find all matching
> > + * samples, merge the callchains and process them.
> > + */
> > static int evlist__deliver_deferred_samples(struct evlist *evlist,
> > const struct perf_tool *tool,
> > union perf_event *event,
> > @@ -1331,6 +1336,86 @@ static int evlist__deliver_deferred_samples(struct evlist *evlist,
> > free(orig_sample.callchain);
> >
> > list_del(&de->list);
> > + if (de->allocated)
> > + free(de->event);
> > + free(de);
> > +
> > + if (ret)
> > + break;
> > + }
> > + return ret;
> > +}
> > +
> > +/*
> > + * This is called when the backing mmap is about to go away. It needs to save
> > + * the original sample data until it finds the matching deferred callchains.
> > + */
> > +static void evlist__copy_deferred_samples(struct evlist *evlist,
> > + const struct perf_tool *tool,
> > + struct machine *machine)
> > +{
> > + struct deferred_event *de, *tmp;
> > + struct evsel *evsel;
> > + int ret = 0;
> > +
> > + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
> > + struct perf_sample sample;
> > + size_t sz = de->event->header.size;
> > + void *buf;
> > +
> > + if (de->allocated)
> > + continue;
> > +
> > + buf = malloc(sz);
> > + if (buf) {
> > + memcpy(buf, de->event, sz);
> > + de->event = buf;
> > + de->allocated = true;
> > + continue;
> > + }
> > +
> > + /* The allocation failed, flush the sample now */
> > + ret = evlist__parse_sample(evlist, de->event, &sample);
> > + if (ret == 0) {
> > + evsel = evlist__id2evsel(evlist, sample.id);
> > + evlist__deliver_sample(evlist, tool, de->event,
> > + &sample, evsel, machine);
> > + }
> > +
> > + list_del(&de->list);
> > + BUG_ON(de->allocated);
> > + free(de);
> > + }
> > +}
> > +
> > +/*
> > + * This is called at the end of the data processing for the session. Flush the
> > + * remaining samples as there's no hope for matching deferred callchains.
> > + */
> > +static int evlist__flush_deferred_samples(struct evlist *evlist,
> > + const struct perf_tool *tool,
> > + struct machine *machine)
> > +{
> > + struct deferred_event *de, *tmp;
> > + struct evsel *evsel;
> > + int ret = 0;
> > +
> > + list_for_each_entry_safe(de, tmp, &evlist->deferred_samples, list) {
> > + struct perf_sample sample;
> > +
> > + ret = evlist__parse_sample(evlist, de->event, &sample);
> > + if (ret < 0) {
> > + pr_err("failed to parse original sample\n");
> > + break;
> > + }
> > +
> > + evsel = evlist__id2evsel(evlist, sample.id);
> > + ret = evlist__deliver_sample(evlist, tool, de->event,
> > + &sample, evsel, machine);
> > +
> > + list_del(&de->list);
> > + if (de->allocated)
> > + free(de->event);
> > free(de);
> >
> > if (ret)
> > @@ -1374,6 +1459,7 @@ static int machines__deliver_event(struct machines *machines,
> > return -ENOMEM;
> >
> > de->event = event;
> > + de->allocated = false;
> > list_add_tail(&de->list, &evlist->deferred_samples);
> > return 0;
> > }
> > @@ -2218,6 +2304,8 @@ reader__mmap(struct reader *rd, struct perf_session *session)
> > }
> >
> > if (mmaps[rd->mmap_idx]) {
> > + evlist__copy_deferred_samples(session->evlist, session->tool,
> > + &session->machines.host);
> > munmap(mmaps[rd->mmap_idx], rd->mmap_size);
> > mmaps[rd->mmap_idx] = NULL;
> > }
> > @@ -2372,6 +2460,11 @@ static int __perf_session__process_events(struct perf_session *session)
> > if (err)
> > goto out_err;
> > err = auxtrace__flush_events(session, tool);
> > + if (err)
> > + goto out_err;
> > + err = evlist__flush_deferred_samples(session->evlist,
> > + session->tool,
> > + &session->machines.host);
> > if (err)
> > goto out_err;
> > err = perf_session__flush_thread_stacks(session);
> > @@ -2494,6 +2587,11 @@ static int __perf_session__process_dir_events(struct perf_session *session)
> > if (ret)
> > goto out_err;
> >
> > + ret = evlist__flush_deferred_samples(session->evlist, tool,
> > + &session->machines.host);
> > + if (ret)
> > + goto out_err;
> > +
> > ret = perf_session__flush_thread_stacks(session);
> > out_err:
> > ui_progress__finish();
> > --
> > 2.52.0.rc1.455.g30608eb744-goog
> >
Powered by blists - more mailing lists