lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 22 Feb 2022 21:19:30 +0900
From:   Masami Hiramatsu <mhiramat@...nel.org>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Ingo Molnar <mingo@...nel.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Tzvetomir Stoyanov <tz.stoyanov@...il.com>,
        Masami Hiramatsu <mhiramat@...nel.org>
Subject: Re: [PATCH] eprobes: Remove redundant event type information

On Fri, 18 Feb 2022 19:00:57 -0500
Steven Rostedt <rostedt@...dmis.org> wrote:

> From 3163c1db8bbde856367b9d4e132d1bac9ec26704 Mon Sep 17 00:00:00 2001
> From: "Steven Rostedt (Google)" <rostedt@...dmis.org>
> Date: Fri, 18 Feb 2022 18:52:38 -0500
> Subject: [PATCH] eprobes: Remove redundant event type information
> 
> Currently, the event probes save the type of the event they are attached
> to when recording the event. For example:
> 
>   # echo 'e:switch sched/sched_switch prev_state=$prev_state prev_prio=$prev_prio next_pid=$next_pid next_prio=$next_prio' > dynamic_events
>   # cat events/eprobes/switch/format
> 
>  name: switch
>  ID: 1717
>  format:
>         field:unsigned short common_type;       offset:0;       size:2; signed:0;
>         field:unsigned char common_flags;       offset:2;       size:1; signed:0;
>         field:unsigned char common_preempt_count;       offset:3;       size:1; signed:0;
>         field:int common_pid;   offset:4;       size:4; signed:1;
> 
>         field:unsigned int __probe_type;        offset:8;       size:4; signed:0;
>         field:u64 prev_state;   offset:12;      size:8; signed:0;
>         field:u64 prev_prio;    offset:20;      size:8; signed:0;
>         field:u64 next_pid;     offset:28;      size:8; signed:0;
>         field:u64 next_prio;    offset:36;      size:8; signed:0;
> 
>  print fmt: "(%u) prev_state=0x%Lx prev_prio=0x%Lx next_pid=0x%Lx next_prio=0x%Lx", REC->__probe_type, REC->prev_state, REC->prev_prio, REC->next_pid, REC->next_prio
> 
> The __probe_type adds 4 bytes to every event.
> 
> One of the reasons for creating eprobes is to limit what is traced in an
> event to be able to limit what is written into the ring buffer. Having
> this redundant 4 bytes to every event takes away from this.
> 
> The event that is recorded can be retrieved from the event probe itself,
> that is available when the trace is happening. For user space tools, it
> could simply read the dynamic_event file to find the event they are for.
> So there is really no reason to write this information into the ring
> buffer for every event.

OK, This looks good to me.

Acked-by: Masami Hiramatsu <mhiramat@...nel.org>

Thank you,

> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
> ---
>  kernel/trace/trace.h        |  1 -
>  kernel/trace/trace_eprobe.c | 15 +++++++--------
>  kernel/trace/trace_probe.c  | 10 +++++-----
>  kernel/trace/trace_probe.h  |  1 -
>  4 files changed, 12 insertions(+), 15 deletions(-)
> 
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 0f5e22238cd2..07d990270e2a 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -136,7 +136,6 @@ struct kprobe_trace_entry_head {
>  
>  struct eprobe_trace_entry_head {
>  	struct trace_entry	ent;
> -	unsigned int		type;
>  };
>  
>  struct kretprobe_trace_entry_head {
> diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c
> index 191db32dec46..02838d47129f 100644
> --- a/kernel/trace/trace_eprobe.c
> +++ b/kernel/trace/trace_eprobe.c
> @@ -250,8 +250,6 @@ static int eprobe_event_define_fields(struct trace_event_call *event_call)
>  	if (WARN_ON_ONCE(!tp))
>  		return -ENOENT;
>  
> -	DEFINE_FIELD(unsigned int, type, FIELD_STRING_TYPE, 0);
> -
>  	return traceprobe_define_arg_fields(event_call, sizeof(field), tp);
>  }
>  
> @@ -270,7 +268,9 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
>  	struct trace_event_call *pevent;
>  	struct trace_event *probed_event;
>  	struct trace_seq *s = &iter->seq;
> +	struct trace_eprobe *ep;
>  	struct trace_probe *tp;
> +	unsigned int type;
>  
>  	field = (struct eprobe_trace_entry_head *)iter->ent;
>  	tp = trace_probe_primary_from_call(
> @@ -278,15 +278,18 @@ print_eprobe_event(struct trace_iterator *iter, int flags,
>  	if (WARN_ON_ONCE(!tp))
>  		goto out;
>  
> +	ep = container_of(tp, struct trace_eprobe, tp);
> +	type = ep->event->event.type;
> +
>  	trace_seq_printf(s, "%s: (", trace_probe_name(tp));
>  
> -	probed_event = ftrace_find_event(field->type);
> +	probed_event = ftrace_find_event(type);
>  	if (probed_event) {
>  		pevent = container_of(probed_event, struct trace_event_call, event);
>  		trace_seq_printf(s, "%s.%s", pevent->class->system,
>  				 trace_event_name(pevent));
>  	} else {
> -		trace_seq_printf(s, "%u", field->type);
> +		trace_seq_printf(s, "%u", type);
>  	}
>  
>  	trace_seq_putc(s, ')');
> @@ -498,10 +501,6 @@ __eprobe_trace_func(struct eprobe_data *edata, void *rec)
>  		return;
>  
>  	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
> -	if (edata->ep->event)
> -		entry->type = edata->ep->event->event.type;
> -	else
> -		entry->type = 0;
>  	store_trace_args(&entry[1], &edata->ep->tp, rec, sizeof(*entry), dsize);
>  
>  	trace_event_buffer_commit(&fbuffer);
> diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
> index 73d90179b51b..80863c6508e5 100644
> --- a/kernel/trace/trace_probe.c
> +++ b/kernel/trace/trace_probe.c
> @@ -871,15 +871,15 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
>  	switch (ptype) {
>  	case PROBE_PRINT_NORMAL:
>  		fmt = "(%lx)";
> -		arg = "REC->" FIELD_STRING_IP;
> +		arg = ", REC->" FIELD_STRING_IP;
>  		break;
>  	case PROBE_PRINT_RETURN:
>  		fmt = "(%lx <- %lx)";
> -		arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
> +		arg = ", REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
>  		break;
>  	case PROBE_PRINT_EVENT:
> -		fmt = "(%u)";
> -		arg = "REC->" FIELD_STRING_TYPE;
> +		fmt = "";
> +		arg = "";
>  		break;
>  	default:
>  		WARN_ON_ONCE(1);
> @@ -903,7 +903,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
>  					parg->type->fmt);
>  	}
>  
> -	pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
> +	pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", arg);
>  
>  	for (i = 0; i < tp->nr_args; i++) {
>  		parg = tp->args + i;
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index 99e7a5df025e..92cc149af0fd 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -38,7 +38,6 @@
>  #define FIELD_STRING_IP		"__probe_ip"
>  #define FIELD_STRING_RETIP	"__probe_ret_ip"
>  #define FIELD_STRING_FUNC	"__probe_func"
> -#define FIELD_STRING_TYPE	"__probe_type"
>  
>  #undef DEFINE_FIELD
>  #define DEFINE_FIELD(type, item, name, is_signed)			\
> -- 
> 2.34.1
> 


-- 
Masami Hiramatsu <mhiramat@...nel.org>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ