lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 25 Sep 2012 17:42:16 -0400
From:	Steven Rostedt <rostedt@...dmis.org>
To:	David Sharp <dhsharp@...gle.com>
Cc:	linux-kernel@...r.kernel.org,
	Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>
Subject: Re: [PATCH v4 3/3] tracing: format non-nanosec times from tsc clock
 without a decimal point.

Sorry, I should have been more picky before. I haven't totally tested
this yet.

On Tue, 2012-09-25 at 13:49 -0700, David Sharp wrote:
> With the addition of the "tsc" clock, formatting timestamps to look like
> fractional seconds is misleading. Mark clocks as either in nanoseconds or
> not, and format non-nanosecond timestamps as decimal integers.
> 
> Tested:
> $ cd /sys/kernel/debug/tracing/
> $ cat trace_clock
> [local] global tsc
> $ echo sched_switch > set_event
> $ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
> $ cat trace
>           <idle>-0     [000]  6330.555552: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
>            sleep-29964 [000]  6330.555628: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>   ...
> $ echo 1 > options/latency-format
> $ cat trace
>   <idle>-0       0 4104553247us+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=29964 next_prio=120
>    sleep-29964   0 4104553322us+: sched_switch: prev_comm=bash prev_pid=29964 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>   ...
> $ echo tsc > trace_clock
> $ cat trace
> $ echo 1 > tracing_enabled ; sleep 0.0005 ; echo 0 > tracing_enabled
> $ echo 0 > options/latency-format
> $ cat trace
>           <idle>-0     [000] 16490053398357: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
>            sleep-31128 [000] 16490053588518: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>   ...
> echo 1 > options/latency-format
> $ cat trace
>   <idle>-0       0 91557653238+: sched_switch: prev_comm=swapper prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=bash next_pid=31128 next_prio=120
>    sleep-31128   0 91557843399+: sched_switch: prev_comm=bash prev_pid=31128 prev_prio=120 prev_state=S ==> next_comm=swapper next_pid=0 next_prio=120
>   ...
> 
> v2:
> Move arch-specific bits out of generic code.
> v4:
> Fix x86_32 build due to 64-bit division.
> 
> Google-Bug-Id: 6980623
> Signed-off-by: David Sharp <dhsharp@...gle.com>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Masami Hiramatsu <masami.hiramatsu.pt@...achi.com>
> ---
>  arch/x86/include/asm/trace_clock.h |    2 +-
>  include/linux/ftrace_event.h       |    6 +++
>  kernel/trace/trace.c               |   15 +++++-
>  kernel/trace/trace.h               |    4 --
>  kernel/trace/trace_output.c        |   84 +++++++++++++++++++++++++-----------
>  5 files changed, 78 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/include/asm/trace_clock.h b/arch/x86/include/asm/trace_clock.h
> index 7ee0d8c..45e17f5 100644
> --- a/arch/x86/include/asm/trace_clock.h
> +++ b/arch/x86/include/asm/trace_clock.h
> @@ -9,7 +9,7 @@
>  extern u64 notrace trace_clock_x86_tsc(void);
>  
>  # define ARCH_TRACE_CLOCKS \
> -	{ trace_clock_x86_tsc,	"x86-tsc" },
> +	{ trace_clock_x86_tsc,	"x86-tsc",	.in_ns = 0 },
>  
>  #endif
>  
> diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
> index 642928c..c760670 100644
> --- a/include/linux/ftrace_event.h
> +++ b/include/linux/ftrace_event.h
> @@ -86,6 +86,12 @@ struct trace_iterator {
>  	cpumask_var_t		started;
>  };
>  
> +enum trace_iter_flags {
> +	TRACE_FILE_LAT_FMT	= 1,
> +	TRACE_FILE_ANNOTATE	= 2,
> +	TRACE_FILE_TIME_IN_NS	= 4,
> +};
> +
>  
>  struct trace_event;
>  
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 4e26df3..3fe4c5b 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -476,10 +476,11 @@ static const char *trace_options[] = {
>  static struct {
>  	u64 (*func)(void);
>  	const char *name;
> +	int in_ns; /* is this clock in nanoseconds? */

Add a few tabs between the ns; and /*


>  } trace_clocks[] = {
> -	{ trace_clock_local,	"local" },
> -	{ trace_clock_global,	"global" },
> -	{ trace_clock_counter,	"counter" },
> +	{ trace_clock_local,	"local",	1 },
> +	{ trace_clock_global,	"global",	1 },
> +	{ trace_clock_counter,	"counter",	0 },
>  	ARCH_TRACE_CLOCKS
>  };
>  
> @@ -2425,6 +2426,10 @@ __tracing_open(struct inode *inode, struct file *file)
>  	if (ring_buffer_overruns(iter->tr->buffer))
>  		iter->iter_flags |= TRACE_FILE_ANNOTATE;
>  
> +	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
> +	if (trace_clocks[trace_clock_id].in_ns)
> +		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
> +
>  	/* stop the trace while dumping */
>  	tracing_stop();
>  
> @@ -3324,6 +3329,10 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
>  	if (trace_flags & TRACE_ITER_LATENCY_FMT)
>  		iter->iter_flags |= TRACE_FILE_LAT_FMT;
>  
> +	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
> +	if (trace_clocks[trace_clock_id].in_ns)
> +		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
> +
>  	iter->cpu_file = cpu_file;
>  	iter->tr = &global_trace;
>  	mutex_init(&iter->mutex);
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 55e1f7f..84fefed 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -409,10 +409,6 @@ void tracing_start_sched_switch_record(void);
>  int register_tracer(struct tracer *type);
>  void unregister_tracer(struct tracer *type);
>  int is_tracing_stopped(void);
> -enum trace_file_type {
> -	TRACE_FILE_LAT_FMT	= 1,
> -	TRACE_FILE_ANNOTATE	= 2,
> -};
>  
>  extern cpumask_var_t __read_mostly tracing_buffer_mask;
>  
> diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
> index 123b189..ca640ff 100644
> --- a/kernel/trace/trace_output.c
> +++ b/kernel/trace/trace_output.c
> @@ -610,24 +610,59 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
>  	return trace_print_lat_fmt(s, entry);
>  }
>  
> -static unsigned long preempt_mark_thresh = 100;
> +static unsigned long preempt_mark_thresh_us = 100;
> +/* roughly the same at 2.0GHz: */
> +static unsigned long preempt_mark_thresh_cycles = 200000;
>  
>  static int
> -lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
> -		    unsigned long rel_usecs)
> +lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
>  {
> -	return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
> -				rel_usecs > preempt_mark_thresh ? '!' :
> -				  rel_usecs > 1 ? '+' : ' ');
> +	int ret;
> +	struct trace_seq *s = &iter->seq;

Move the above down below, to give a nicer aesthetic look.

> +	unsigned long verbose = trace_flags & TRACE_ITER_VERBOSE;
> +	unsigned long in_ns = iter->iter_flags & TRACE_FILE_TIME_IN_NS;
> +	unsigned long long abs_ts = iter->ts - iter->tr->time_start;
> +	unsigned long long rel_ts = next_ts - iter->ts;
	  struct trace_seq *s = &iter->seq;
> +	unsigned long mark_thresh;
	  int ret;

> +
> +	if (in_ns) {
> +		abs_ts = ns2usecs(abs_ts);
> +		rel_ts = ns2usecs(rel_ts);
> +		mark_thresh = preempt_mark_thresh_us;
> +	} else
> +		mark_thresh = preempt_mark_thresh_cycles;
> +
> +	if (verbose && in_ns) {
> +		unsigned long abs_msec = abs_ts;
> +		unsigned long abs_usec = do_div(abs_msec, USEC_PER_MSEC);
> +		unsigned long rel_msec = rel_ts;
> +		unsigned long rel_usec = do_div(rel_msec, USEC_PER_MSEC);

Either add a space here, or move the declarations to the top of the
function and keep the code part here.

> +		ret = trace_seq_printf(
> +				s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
> +				ns2usecs(iter->ts),
> +				abs_msec, abs_usec,
> +				rel_msec, rel_usec);
> +	} else if (verbose && !in_ns) {
> +		ret = trace_seq_printf(
> +				s, "[%016llx] %lld (+%lld): ",
> +				iter->ts, abs_ts, rel_ts);
> +	} else { /* !verbose */
> +		ret = trace_seq_printf(
> +				s, " %4lld%s%c: ",
> +				abs_ts,
> +				in_ns ? "us" : "",
> +				rel_ts > mark_thresh ? '!' :
> +				  rel_ts > 1 ? '+' : ' ');
> +	}
> +	return ret;
>  }
>  
>  int trace_print_context(struct trace_iterator *iter)
>  {
>  	struct trace_seq *s = &iter->seq;
>  	struct trace_entry *entry = iter->ent;
> -	unsigned long long t = ns2usecs(iter->ts);
> -	unsigned long usec_rem = do_div(t, USEC_PER_SEC);
> -	unsigned long secs = (unsigned long)t;
> +	unsigned long long t;
> +	unsigned long secs, usec_rem;
>  	char comm[TASK_COMM_LEN];
>  	int ret;
>  
> @@ -644,8 +679,13 @@ int trace_print_context(struct trace_iterator *iter)
>  			return 0;
>  	}
>  
> -	return trace_seq_printf(s, " %5lu.%06lu: ",
> -				secs, usec_rem);
> +	if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
> +		t = ns2usecs(iter->ts);
> +		usec_rem = do_div(t, USEC_PER_SEC);
> +		secs = (unsigned long)t;
> +		return trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
> +	} else
> +		return trace_seq_printf(s, "%12llu: ", iter->ts);
>  }
>  
>  int trace_print_lat_context(struct trace_iterator *iter)
> @@ -659,36 +699,30 @@ int trace_print_lat_context(struct trace_iterator *iter)
>  			   *next_entry = trace_find_next_entry(iter, NULL,
>  							       &next_ts);
>  	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
> -	unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
> -	unsigned long rel_usecs;
> +

Is this adding an extra newline?

-- Steve

>  
>  	/* Restore the original ent_size */
>  	iter->ent_size = ent_size;
>  
>  	if (!next_entry)
>  		next_ts = iter->ts;
> -	rel_usecs = ns2usecs(next_ts - iter->ts);
>  
>  	if (verbose) {
>  		char comm[TASK_COMM_LEN];
>  
>  		trace_find_cmdline(entry->pid, comm);
>  
> -		ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08llx]"
> -				       " %ld.%03ldms (+%ld.%03ldms): ", comm,
> -				       entry->pid, iter->cpu, entry->flags,
> -				       entry->preempt_count, iter->idx,
> -				       ns2usecs(iter->ts),
> -				       abs_usecs / USEC_PER_MSEC,
> -				       abs_usecs % USEC_PER_MSEC,
> -				       rel_usecs / USEC_PER_MSEC,
> -				       rel_usecs % USEC_PER_MSEC);
> +		ret = trace_seq_printf(
> +				s, "%16s %5d %3d %d %08x %08lx ",
> +				comm, entry->pid, iter->cpu, entry->flags,
> +				entry->preempt_count, iter->idx);
>  	} else {
>  		ret = lat_print_generic(s, entry, iter->cpu);
> -		if (ret)
> -			ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
>  	}
>  
> +	if (ret)
> +		ret = lat_print_timestamp(iter, next_ts);
> +
>  	return ret;
>  }
>  


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ