lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAEXW_YSGa7a1UgYdiE7wcMsvsYnYB-jW3mQ=KRHSoU95gruS7g@mail.gmail.com>
Date:   Tue, 22 Feb 2022 10:04:29 -0500
From:   Joel Fernandes <joel@...lfernandes.org>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Tzvetomir Stoyanov <tz.stoyanov@...il.com>,
        Masami Hiramatsu <mhiramat@...nel.org>
Subject: Re: [PATCH] eprobes: Remove redundant event type information

On Fri, Feb 18, 2022 at 7:01 PM Steven Rostedt <rostedt@...dmis.org> wrote:
>
> From 3163c1db8bbde856367b9d4e132d1bac9ec26704 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (Google)" <rostedt@...dmis.org>
> Date: Fri, 18 Feb 2022 18:52:38 -0500
> Subject: [PATCH] eprobes: Remove redundant event type information

Thanks!

Reviewed-by: Joel Fernandes <joel@...lfernandes.org>

Joel


>
> Currently, the event probes save the type of the event they are attached
> to when recording the event. For example:
>
>   # echo 'e:switch sched/sched_switch prev_state=$prev_state prev_prio=$prev_prio next_pid=$next_pid next_prio=$next_prio' > dynamic_events
>   # cat events/eprobes/switch/format
>
>  name: switch
>  ID: 1717
>  format:
>         field:unsigned short common_type;       offset:0;       size:2; signed:0;
>         field:unsigned char common_flags;       offset:2;       size:1; signed:0;
>         field:unsigned char common_preempt_count;       offset:3;       size:1; signed:0;
>         field:int common_pid;   offset:4;       size:4; signed:1;
>
>         field:unsigned int __probe_type;        offset:8;       size:4; signed:0;
>         field:u64 prev_state;   offset:12;      size:8; signed:0;
>         field:u64 prev_prio;    offset:20;      size:8; signed:0;
>         field:u64 next_pid;     offset:28;      size:8; signed:0;
>         field:u64 next_prio;    offset:36;      size:8; signed:0;
>
>  print fmt: "(%u) prev_state=0x%Lx prev_prio=0x%Lx next_pid=0x%Lx next_prio=0x%Lx", REC->__probe_type, REC->prev_state, REC->prev_prio, REC->next_pid, REC->next_prio
>
> The __probe_type adds 4 bytes to every event.
>
> One of the reasons for creating eprobes is to limit what is traced in an
> event to be able to limit what is written into the ring buffer. Having
> this redundant 4 bytes to every event takes away from this.
>
> The event that is recorded can be retrieved from the event probe itself,
> that is available when the trace is happening. For user space tools, it
> could simply read the dynamic_event file to find the event they are for.
> So there is really no reason to write this information into the ring
> buffer for every event.
>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
> ---
>  kernel/trace/trace.h        |  1 -
>  kernel/trace/trace_eprobe.c | 15 +++++++--------
>  kernel/trace/trace_probe.c  | 10 +++++-----
>  kernel/trace/trace_probe.h  |  1 -
>  4 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 0f5e22238cd2..07d990270e2a 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -136,7 +136,6 @@ struct kprobe_trace_entry_head {
>
>  struct eprobe_trace_entry_head {
>         struct trace_entry      ent;
> -       unsigned int            type;
>  };
>
>  struct kretprobe_trace_entry_head {
> diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
> index 191db32dec46..02838d47129f 100644
> --- a/kernel/trace/trace_eprobe.c
> +++ b/kernel/trace/trace_eprobe.c
> @@ -250,8 +250,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call)
>         if (WARN_ON_ONCE(!tp))
>                 return -ENOENT;
>
> -       DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
> -
>         return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
>  }
>
> @@ -270,7 +268,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
>         struct trace_event_call *pevent;
>         struct trace_event *probed_event;
>         struct trace_seq *s = &iter->seq;
> +       struct trace_eprobe *ep;
>         struct trace_probe *tp;
> +       unsigned int type;
>
>         field = (struct eprobe_trace_entry_head *)iter->ent;
>         tp = trace_probe_primary_from_call(
> @@ -278,15 +278,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
>         if (WARN_ON_ONCE(!tp))
>                 goto out;
>
> +       ep = container_of(tp, struct trace_eprobe, tp);
> +       type = ep->event->event.type;
> +
>         trace_seq_printf(s, "%s: (", trace_probe_name(tp));
>
> -       probed_event = ftrace_find_event(field->type);
> +       probed_event = ftrace_find_event(type);
>         if (probed_event) {
>                 pevent = container_of(probed_event, struct trace_event_call, event);
>                 trace_seq_printf(s, "%s.%s", pevent->class->system,
>                                  trace_event_name(pevent));
>         } else {
> -               trace_seq_printf(s, "%u", field->type);
> +               trace_seq_printf(s, "%u", type);
>         }
>
>         trace_seq_putc(s, ')');
> @@ -498,10 +501,6 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
>                 return;
>
>         entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
> -       if (edata->ep->event)
> -               entry->type = edata->ep->event->event.type;
> -       else
> -               entry->type = 0;
>         store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
>
>         trace_event_buffer_commit(&fbuffer);
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 73d90179b51b..80863c6508e5 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
>         switch (ptype) {
>         case PROBE_PRINT_NORMAL:
>                 fmt = "(%lx)";
> -               arg = "REC->" FIELD_STRING_IP;
> +               arg = ", REC->" FIELD_STRING_IP;
>                 break;
>         case PROBE_PRINT_RETURN:
>                 fmt = "(%lx <- %lx)";
> -               arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
> +               arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
>                 break;
>         case PROBE_PRINT_EVENT:
> -               fmt = "(%u)";
> -               arg = "REC->" FIELD_STRING_TYPE;
> +               fmt = "";
> +               arg = "";
>                 break;
>         default:
>                 WARN_ON_ONCE(1);
> @@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
>                                         parg->type->fmt);
>         }
>
> -       pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
> +       pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg);
>
>         for (i = 0; i < tp->nr_args; i++) {
>                 parg = tp->args + i;
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 99e7a5df025e..92cc149af0fd 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -38,7 +38,6 @@
>  #define FIELD_STRING_IP                "__probe_ip"
>  #define FIELD_STRING_RETIP     "__probe_ret_ip"
>  #define FIELD_STRING_FUNC      "__probe_func"
> -#define FIELD_STRING_TYPE      "__probe_type"
>
>  #undef DEFINE_FIELD
>  #define DEFINE_FIELD(type, item, name, is_signed)                      \
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ