[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aVG2VoBpFoEEE1gG@krava>
Date: Sun, 28 Dec 2025 23:59:34 +0100
From: Jiri Olsa <olsajiri@...il.com>
To: Steven Rostedt <rostedt@...dmis.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
Linux Trace Kernel <linux-trace-kernel@...r.kernel.org>,
Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Ian Rogers <irogers@...gle.com>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Peter Zijlstra <peterz@...radead.org>
Subject: Re: [PATCH] tracing: Allow perf to read synthetic events
On Wed, Dec 17, 2025 at 11:39:20AM -0500, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@...dmis.org>
>
> Currently, perf can not enable synthetic events. When it does, it either
> causes a warning in the kernel or errors with "no such device".
>
> Add the necessary code to allow perf to also attach to synthetic events.
>
> Reported-by: Ian Rogers <irogers@...gle.com>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
hi,
I don't see the crash, but perf record/script gives me 'FAILED TO PARSE' in perf script:
# cd /sys/kernel/tracing
# echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
# echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 2' >> events/sched/sched_switch/trigger
# echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
# echo 1 > events/synthetic/block_lat/enable
# perf record -e 'synthetic:block_lat' -a
^C[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.259 MB perf.data (1 samples) ]
# perf script
kworker/u33:2-w 244 [000] 1707.836263: synthetic:block_lat: [FAILED TO PARSE] pid=244 delta=21 stack=ARRAY[0b, 00, 00, 00, 00, 00, 00, 00, 1d, 72, 9d, 82, ff, ff, ff, ff, 0d, 7d, 9d, 82, ff, ff, ff, ff, 3d, 3d, 9e, 82, ff, ff, ff, ff, 05, 91, 9d, 82, ff, ff, ff, ff, 40, 7a, 42, 81, ff, ff, ff, ff, 5e, f4, 0c, 82, ff, ff, ff, ff, 43, 8d, 0c, 82, ff, ff, ff, ff, 82, 2d, 89, 81, ff, ff, ff, ff, 9b, 39, 89, 81, ff, ff, ff, ff, a6, 5a, 9c, 82, ff, ff, ff, ff, 2f, 01, 00, 81, ff, ff, ff, ff]
not sure it's fixed in the latest libtraceevent, mine is
libtraceevent-1.8.4-3.fc42.x86_64
jirka
> ---
> kernel/trace/trace_events_synth.c | 121 +++++++++++++++++++++++-------
> 1 file changed, 94 insertions(+), 27 deletions(-)
>
> diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
> index 4554c458b78c..026e06f28958 100644
> --- a/kernel/trace/trace_events_synth.c
> +++ b/kernel/trace/trace_events_synth.c
> @@ -493,28 +493,19 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
> return len;
> }
>
> -static notrace void trace_event_raw_event_synth(void *__data,
> - u64 *var_ref_vals,
> - unsigned int *var_ref_idx)
> +static __always_inline int get_field_size(struct synth_event *event,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> {
> - unsigned int i, n_u64, val_idx, len, data_size = 0;
> - struct trace_event_file *trace_file = __data;
> - struct synth_trace_event *entry;
> - struct trace_event_buffer fbuffer;
> - struct trace_buffer *buffer;
> - struct synth_event *event;
> - int fields_size = 0;
> -
> - event = trace_file->event_call->data;
> -
> - if (trace_trigger_soft_disabled(trace_file))
> - return;
> + int fields_size;
>
> fields_size = event->n_u64 * sizeof(u64);
>
> - for (i = 0; i < event->n_dynamic_fields; i++) {
> + for (int i = 0; i < event->n_dynamic_fields; i++) {
> unsigned int field_pos = event->dynamic_fields[i]->field_pos;
> char *str_val;
> + int val_idx;
> + int len;
>
> val_idx = var_ref_idx[field_pos];
> str_val = (char *)(long)var_ref_vals[val_idx];
> @@ -529,18 +520,18 @@ static notrace void trace_event_raw_event_synth(void *__data,
>
> fields_size += len;
> }
> + return fields_size;
> +}
>
> - /*
> - * Avoid ring buffer recursion detection, as this event
> - * is being performed within another event.
> - */
> - buffer = trace_file->tr->array_buffer.buffer;
> - guard(ring_buffer_nest)(buffer);
> -
> - entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> - sizeof(*entry) + fields_size);
> - if (!entry)
> - return;
> +static __always_inline void write_synth_entry(struct synth_event *event,
> + struct synth_trace_event *entry,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + int data_size = 0;
> + int i, n_u64;
> + int val_idx;
> + int len;
>
> for (i = 0, n_u64 = 0; i < event->n_fields; i++) {
> val_idx = var_ref_idx[i];
> @@ -581,10 +572,83 @@ static notrace void trace_event_raw_event_synth(void *__data,
> n_u64++;
> }
> }
> +}
> +
> +static notrace void trace_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_file *trace_file = __data;
> + struct synth_trace_event *entry;
> + struct trace_event_buffer fbuffer;
> + struct trace_buffer *buffer;
> + struct synth_event *event;
> + int fields_size;
> +
> + event = trace_file->event_call->data;
> +
> + if (trace_trigger_soft_disabled(trace_file))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + /*
> + * Avoid ring buffer recursion detection, as this event
> + * is being performed within another event.
> + */
> + buffer = trace_file->tr->array_buffer.buffer;
> + guard(ring_buffer_nest)(buffer);
> +
> + entry = trace_event_buffer_reserve(&fbuffer, trace_file,
> + sizeof(*entry) + fields_size);
> + if (!entry)
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
>
> trace_event_buffer_commit(&fbuffer);
> }
>
> +#ifdef CONFIG_PERF_EVENTS
> +static notrace void perf_event_raw_event_synth(void *__data,
> + u64 *var_ref_vals,
> + unsigned int *var_ref_idx)
> +{
> + struct trace_event_call *call = __data;
> + struct synth_trace_event *entry;
> + struct hlist_head *perf_head;
> + struct synth_event *event;
> + struct pt_regs *regs;
> + int fields_size;
> + size_t size;
> + int context;
> +
> + event = call->data;
> +
> + perf_head = this_cpu_ptr(call->perf_events);
> +
> + if (!perf_head || hlist_empty(perf_head))
> + return;
> +
> + fields_size = get_field_size(event, var_ref_vals, var_ref_idx);
> +
> + size = ALIGN(sizeof(*entry) + fields_size, 8);
> +
> + entry = perf_trace_buf_alloc(size, ®s, &context);
> +
> + if (unlikely(!entry))
> + return;
> +
> + write_synth_entry(event, entry, var_ref_vals, var_ref_idx);
> +
> + perf_fetch_caller_regs(regs);
> +
> + perf_trace_buf_submit(entry, size, context,
> + call->event.type, 1, regs,
> + perf_head, NULL);
> +}
> +#endif
> +
> static void free_synth_event_print_fmt(struct trace_event_call *call)
> {
> if (call) {
> @@ -911,6 +975,9 @@ static int register_synth_event(struct synth_event *event)
> call->flags = TRACE_EVENT_FL_TRACEPOINT;
> call->class->reg = synth_event_reg;
> call->class->probe = trace_event_raw_event_synth;
> +#ifdef CONFIG_PERF_EVENTS
> + call->class->perf_probe = perf_event_raw_event_synth;
> +#endif
> call->data = event;
> call->tp = event->tp;
>
> --
> 2.51.0
>
Powered by blists - more mailing lists