[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220222211930.458224fe0499e036f3c5a06d@kernel.org>
Date: Tue, 22 Feb 2022 21:19:30 +0900
From: Masami Hiramatsu <mhiramat@...nel.org>
To: Steven Rostedt <rostedt@...dmis.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
Ingo Molnar <mingo@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Joel Fernandes <joel@...lfernandes.org>,
Tzvetomir Stoyanov <tz.stoyanov@...il.com>,
Masami Hiramatsu <mhiramat@...nel.org>
Subject: Re: [PATCH] eprobes: Remove redundant event type information
On Fri, 18 Feb 2022 19:00:57 -0500
Steven Rostedt <rostedt@...dmis.org> wrote:
> From 3163c1db8bbde856367b9d4e132d1bac9ec26704 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (Google)" <rostedt@...dmis.org>
> Date: Fri, 18 Feb 2022 18:52:38 -0500
> Subject: [PATCH] eprobes: Remove redundant event type information
>
> Currently, the event probes save the type of the event they are attached
> to when recording the event. For example:
>
> # echo 'e:switch sched/sched_switch prev_state=$prev_state prev_prio=$prev_prio next_pid=$next_pid next_prio=$next_prio' > dynamic_events
> # cat events/eprobes/switch/format
>
> name: switch
> ID: 1717
> format:
> field:unsigned short common_type; offset:0; size:2; signed:0;
> field:unsigned char common_flags; offset:2; size:1; signed:0;
> field:unsigned char common_preempt_count; offset:3; size:1; signed:0;
> field:int common_pid; offset:4; size:4; signed:1;
>
> field:unsigned int __probe_type; offset:8; size:4; signed:0;
> field:u64 prev_state; offset:12; size:8; signed:0;
> field:u64 prev_prio; offset:20; size:8; signed:0;
> field:u64 next_pid; offset:28; size:8; signed:0;
> field:u64 next_prio; offset:36; size:8; signed:0;
>
> print fmt: "(%u) prev_state=0x%Lx prev_prio=0x%Lx next_pid=0x%Lx next_prio=0x%Lx", REC->__probe_type, REC->prev_state, REC->prev_prio, REC->next_pid, REC->next_prio
>
> The __probe_type adds 4 bytes to every event.
>
> One of the reasons for creating eprobes is to limit what is traced in an
> event to be able to limit what is written into the ring buffer. Having
> this redundant 4 bytes to every event takes away from this.
>
> The event that is recorded can be retrieved from the event probe itself,
> that is available when the trace is happening. For user space tools, it
> could simply read the dynamic_event file to find the event they are for.
> So there is really no reason to write this information into the ring
> buffer for every event.
OK, This looks good to me.
Acked-by: Masami Hiramatsu <mhiramat@...nel.org>
Thank you,
>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
> ---
> kernel/trace/trace.h | 1 -
> kernel/trace/trace_eprobe.c | 15 +++++++--------
> kernel/trace/trace_probe.c | 10 +++++-----
> kernel/trace/trace_probe.h | 1 -
> 4 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 0f5e22238cd2..07d990270e2a 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -136,7 +136,6 @@ struct kprobe_trace_entry_head {
>
> struct eprobe_trace_entry_head {
> struct trace_entry ent;
> - unsigned int type;
> };
>
> struct kretprobe_trace_entry_head {
> diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
> index 191db32dec46..02838d47129f 100644
> --- a/kernel/trace/trace_eprobe.c
> +++ b/kernel/trace/trace_eprobe.c
> @@ -250,8 +250,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call)
> if (WARN_ON_ONCE(!tp))
> return -ENOENT;
>
> - DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
> -
> return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
> }
>
> @@ -270,7 +268,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
> struct trace_event_call *pevent;
> struct trace_event *probed_event;
> struct trace_seq *s = &iter->seq;
> + struct trace_eprobe *ep;
> struct trace_probe *tp;
> + unsigned int type;
>
> field = (struct eprobe_trace_entry_head *)iter->ent;
> tp = trace_probe_primary_from_call(
> @@ -278,15 +278,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
> if (WARN_ON_ONCE(!tp))
> goto out;
>
> + ep = container_of(tp, struct trace_eprobe, tp);
> + type = ep->event->event.type;
> +
> trace_seq_printf(s, "%s: (", trace_probe_name(tp));
>
> - probed_event = ftrace_find_event(field->type);
> + probed_event = ftrace_find_event(type);
> if (probed_event) {
> pevent = container_of(probed_event, struct trace_event_call, event);
> trace_seq_printf(s, "%s.%s", pevent->class->system,
> trace_event_name(pevent));
> } else {
> - trace_seq_printf(s, "%u", field->type);
> + trace_seq_printf(s, "%u", type);
> }
>
> trace_seq_putc(s, ')');
> @@ -498,10 +501,6 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
> return;
>
> entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
> - if (edata->ep->event)
> - entry->type = edata->ep->event->event.type;
> - else
> - entry->type = 0;
> store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
>
> trace_event_buffer_commit(&fbuffer);
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 73d90179b51b..80863c6508e5 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
> switch (ptype) {
> case PROBE_PRINT_NORMAL:
> fmt = "(%lx)";
> - arg = "REC->" FIELD_STRING_IP;
> + arg = ", REC->" FIELD_STRING_IP;
> break;
> case PROBE_PRINT_RETURN:
> fmt = "(%lx <- %lx)";
> - arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
> + arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
> break;
> case PROBE_PRINT_EVENT:
> - fmt = "(%u)";
> - arg = "REC->" FIELD_STRING_TYPE;
> + fmt = "";
> + arg = "";
> break;
> default:
> WARN_ON_ONCE(1);
> @@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
> parg->type->fmt);
> }
>
> - pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
> + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg);
>
> for (i = 0; i < tp->nr_args; i++) {
> parg = tp->args + i;
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 99e7a5df025e..92cc149af0fd 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -38,7 +38,6 @@
> #define FIELD_STRING_IP "__probe_ip"
> #define FIELD_STRING_RETIP "__probe_ret_ip"
> #define FIELD_STRING_FUNC "__probe_func"
> -#define FIELD_STRING_TYPE "__probe_type"
>
> #undef DEFINE_FIELD
> #define DEFINE_FIELD(type, item, name, is_signed) \
> --
> 2.34.1
>
--
Masami Hiramatsu <mhiramat@...nel.org>
Powered by blists - more mailing lists