lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160623140910.GP4213@kernel.org>
Date:	Thu, 23 Jun 2016 11:09:10 -0300
From:	Arnaldo Carvalho de Melo <acme@...nel.org>
To:	Adrian Hunter <adrian.hunter@...el.com>
Cc:	Jiri Olsa <jolsa@...hat.com>, linux-kernel@...r.kernel.org,
	Andi Kleen <ak@...ux.intel.com>
Subject: Re: [PATCH V2 3/3] perf script: Add callindent option

Em Thu, Jun 23, 2016 at 04:40:58PM +0300, Adrian Hunter escreveu:
> Based on patches from Andi Kleen.
> 
> When printing PT instruction traces with perf script it is rather useful to
> see some indentation for the call tree. This patch adds a new callindent
> field to perf script that prints spaces for the function call stack depth.
> 
> We already have code to track the function call stack for PT, that we can
> reuse with minor modifications.
> 
> The resulting output is not quite as nice as ftrace yet, but a lot better
> than what was there before.

This is shaping up really nicely, well done! Hope Andi can send his ack
for this series if he sees no problems :-)
 
> Note there are some corner cases when the thread stack gets code confused
> and prints incorrect indentation. Even with that it is fairly useful.
> 
> When displaying kernel code traces it is recommended to run as root, as
> otherwise perf doesn't understand the kernel addresses properly, and may
> not reset the call stack correctly on kernel boundaries.
> 
> Example output:

Here I think we should try to make the output more compact, so perhaps
we should have some way to ask 'perf script' to show just timestamp
deltas from the previous entry, for instance, i.e. replace that 'time'
with a new 'delta' -F entry.

- Arnaldo

> 	sudo perf-with-kcore record eg2 -a -e intel_pt// -- sleep 1
> 	sudo perf-with-kcore script eg2 --ns -F callindent,time,comm,pid,sym,ip,addr,flags,cpu --itrace=cre | less
> 	...
>          swapper     0 [000]  5830.389116586:   call        irq_exit                                                     ffffffff8104d620 smp_call_function_single_interrupt+0x30 => ffffffff8107e720 irq_exit
>          swapper     0 [000]  5830.389116586:   call            idle_cpu                                                 ffffffff8107e769 irq_exit+0x49 => ffffffff810a3970 idle_cpu
>          swapper     0 [000]  5830.389116586:   return          idle_cpu                                                 ffffffff810a39b7 idle_cpu+0x47 => ffffffff8107e76e irq_exit
>          swapper     0 [000]  5830.389116586:   call            tick_nohz_irq_exit                                       ffffffff8107e7bd irq_exit+0x9d => ffffffff810f2fc0 tick_nohz_irq_exit
>          swapper     0 [000]  5830.389116919:   call                __tick_nohz_idle_enter                               ffffffff810f2fe0 tick_nohz_irq_exit+0x20 => ffffffff810f28d0 __tick_nohz_idle_enter
>          swapper     0 [000]  5830.389116919:   call                    ktime_get                                        ffffffff810f28f1 __tick_nohz_idle_enter+0x21 => ffffffff810e9ec0 ktime_get
>          swapper     0 [000]  5830.389116919:   call                        read_tsc                                     ffffffff810e9ef6 ktime_get+0x36 => ffffffff81035070 read_tsc
>          swapper     0 [000]  5830.389116919:   return                      read_tsc                                     ffffffff81035084 read_tsc+0x14 => ffffffff810e9efc ktime_get
>          swapper     0 [000]  5830.389116919:   return                  ktime_get                                        ffffffff810e9f46 ktime_get+0x86 => ffffffff810f28f6 __tick_nohz_idle_enter
>          swapper     0 [000]  5830.389116919:   call                    sched_clock_idle_sleep_event                     ffffffff810f290b __tick_nohz_idle_enter+0x3b => ffffffff810a7380 sched_clock_idle_sleep_event
>          swapper     0 [000]  5830.389116919:   call                        sched_clock_cpu                              ffffffff810a738b sched_clock_idle_sleep_event+0xb => ffffffff810a72e0 sched_clock_cpu
>          swapper     0 [000]  5830.389116919:   call                            sched_clock                              ffffffff810a734d sched_clock_cpu+0x6d => ffffffff81035750 sched_clock
>          swapper     0 [000]  5830.389116919:   call                                native_sched_clock                   ffffffff81035754 sched_clock+0x4 => ffffffff81035640 native_sched_clock
>          swapper     0 [000]  5830.389116919:   return                              native_sched_clock                   ffffffff8103568c native_sched_clock+0x4c => ffffffff81035759 sched_clock
>          swapper     0 [000]  5830.389116919:   return                          sched_clock                              ffffffff8103575c sched_clock+0xc => ffffffff810a7352 sched_clock_cpu
>          swapper     0 [000]  5830.389116919:   return                      sched_clock_cpu                              ffffffff810a7356 sched_clock_cpu+0x76 => ffffffff810a7390 sched_clock_idle_sleep_event
>          swapper     0 [000]  5830.389116919:   return                  sched_clock_idle_sleep_event                     ffffffff810a7391 sched_clock_idle_sleep_event+0x11 => ffffffff810f2910 __tick_nohz_idle_enter
> 	...
> 
> Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
> ---
>  tools/perf/Documentation/perf-script.txt |  4 ++
>  tools/perf/builtin-script.c              | 68 +++++++++++++++++++++++++++++++-
>  tools/perf/util/thread-stack.c           |  7 ++++
>  tools/perf/util/thread-stack.h           |  1 +
>  4 files changed, 79 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
> index a46030d8962d..1f6c70594f0f 100644
> --- a/tools/perf/Documentation/perf-script.txt
> +++ b/tools/perf/Documentation/perf-script.txt
> @@ -177,6 +177,10 @@ OPTIONS
>  	"tr end" for "bE". However the "x" flag will be display separately in those
>  	cases e.g. "jcc     (x)" for a condition branch within a transaction.
>  
> +	The callindent field is synthesized and may have a value when
> +	Instruction Trace decoding. For calls and returns, it will display the
> +	name of the symbol indented with spaces to reflect the stack depth.
> +
>  	Finally, a user may not set fields to none for all event types.
>  	i.e., -F "" is not allowed.
>  
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index 5a7633274d0a..c5648fd86229 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -21,6 +21,7 @@
>  #include "util/cpumap.h"
>  #include "util/thread_map.h"
>  #include "util/stat.h"
> +#include "util/thread-stack.h"
>  #include <linux/bitmap.h>
>  #include <linux/stringify.h>
>  #include "asm/bug.h"
> @@ -63,6 +64,7 @@ enum perf_output_field {
>  	PERF_OUTPUT_DATA_SRC	    = 1U << 17,
>  	PERF_OUTPUT_WEIGHT	    = 1U << 18,
>  	PERF_OUTPUT_BPF_OUTPUT	    = 1U << 19,
> +	PERF_OUTPUT_CALLINDENT	    = 1U << 20,
>  };
>  
>  struct output_option {
> @@ -89,6 +91,7 @@ struct output_option {
>  	{.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
>  	{.str = "weight",   .field = PERF_OUTPUT_WEIGHT},
>  	{.str = "bpf-output",   .field = PERF_OUTPUT_BPF_OUTPUT},
> +	{.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
>  };
>  
>  /* default set to maintain compatibility with current format */
> @@ -562,6 +565,62 @@ static void print_sample_addr(struct perf_sample *sample,
>  	}
>  }
>  
> +static void print_sample_callindent(struct perf_sample *sample,
> +				    struct perf_evsel *evsel,
> +				    struct thread *thread,
> +				    struct addr_location *al)
> +{
> +	struct perf_event_attr *attr = &evsel->attr;
> +	size_t depth = thread_stack__depth(thread);
> +	struct addr_location addr_al;
> +	const char *name = NULL;
> +	static int spacing;
> +	int len = 0;
> +	u64 ip = 0;
> +
> +	/*
> +	 * The 'return' has already been popped off the stack so the depth has
> +	 * to be adjusted to match the 'call'.
> +	 */
> +	if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
> +		depth += 1;
> +
> +	if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
> +		if (sample_addr_correlates_sym(attr)) {
> +			thread__resolve(thread, &addr_al, sample);
> +			if (addr_al.sym)
> +				name = addr_al.sym->name;
> +			else
> +				ip = sample->addr;
> +		} else {
> +			ip = sample->addr;
> +		}
> +	} else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
> +		if (al->sym)
> +			name = al->sym->name;
> +		else
> +			ip = sample->ip;
> +	}
> +
> +	if (name)
> +		len = printf("%*s%s", (int)depth * 4, "", name);
> +	else if (ip)
> +		len = printf("%*s%16" PRIx64, (int)depth * 4, "", ip);
> +
> +	if (len < 0)
> +		return;
> +
> +	/*
> +	 * Try to keep the output length from changing frequently so that the
> +	 * output lines up more nicely.
> +	 */
> +	if (len > spacing || (len && len < spacing - 52))
> +		spacing = round_up(len + 4, 32);
> +
> +	if (len < spacing)
> +		printf("%*s", spacing - len, "");
> +}
> +
>  static void print_sample_bts(struct perf_sample *sample,
>  			     struct perf_evsel *evsel,
>  			     struct thread *thread,
> @@ -570,6 +629,9 @@ static void print_sample_bts(struct perf_sample *sample,
>  	struct perf_event_attr *attr = &evsel->attr;
>  	bool print_srcline_last = false;
>  
> +	if (PRINT_FIELD(CALLINDENT))
> +		print_sample_callindent(sample, evsel, thread, al);
> +
>  	/* print branch_from information */
>  	if (PRINT_FIELD(IP)) {
>  		unsigned int print_opts = output[attr->type].print_ip_opts;
> @@ -2053,7 +2115,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
>  		     "comma separated output fields prepend with 'type:'. "
>  		     "Valid types: hw,sw,trace,raw. "
>  		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
> -		     "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
> +		     "addr,symoff,period,iregs,brstack,brstacksym,flags,"
> +		     "callindent", parse_output_fields),
>  	OPT_BOOLEAN('a', "all-cpus", &system_wide,
>  		    "system-wide collection from all CPUs"),
>  	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
> @@ -2292,6 +2355,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
>  	script.session = session;
>  	script__setup_sample_type(&script);
>  
> +	if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
> +		itrace_synth_opts.thread_stack = true;
> +
>  	session->itrace_synth_opts = &itrace_synth_opts;
>  
>  	if (cpu_list) {
> diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
> index 825086aa9a08..d3301529f6a7 100644
> --- a/tools/perf/util/thread-stack.c
> +++ b/tools/perf/util/thread-stack.c
> @@ -616,3 +616,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
>  
>  	return err;
>  }
> +
> +size_t thread_stack__depth(struct thread *thread)
> +{
> +	if (!thread->ts)
> +		return 0;
> +	return thread->ts->cnt;
> +}
> diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
> index ad44c7944b8e..b7e41c4ebfdd 100644
> --- a/tools/perf/util/thread-stack.h
> +++ b/tools/perf/util/thread-stack.h
> @@ -87,6 +87,7 @@ void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
>  			  size_t sz, u64 ip);
>  int thread_stack__flush(struct thread *thread);
>  void thread_stack__free(struct thread *thread);
> +size_t thread_stack__depth(struct thread *thread);
>  
>  struct call_return_processor *
>  call_return_processor__new(int (*process)(struct call_return *cr, void *data),
> -- 
> 1.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ