lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090226175302.GD5889@nowhere>
Date:	Thu, 26 Feb 2009 18:53:03 +0100
From:	Frederic Weisbecker <fweisbec@...il.com>
To:	hpa@...or.com, mingo@...hat.com, rostedt@...dmis.org,
	peterz@...radead.org, tglx@...utronix.de, mingo@...e.hu,
	linux-kernel@...r.kernel.org
Cc:	linux-tip-commits@...r.kernel.org
Subject: Re: [tip:tracing/ftrace] tracing: implement trace_clock_*() APIs

On Thu, Feb 26, 2009 at 05:45:48PM +0000, Ingo Molnar wrote:
> Author:     Ingo Molnar <mingo@...e.hu>
> AuthorDate: Thu, 26 Feb 2009 18:47:11 +0100
> Commit:     Ingo Molnar <mingo@...e.hu>
> CommitDate: Thu, 26 Feb 2009 18:44:06 +0100
> 
> tracing: implement trace_clock_*() APIs
> 
> Impact: implement new tracing timestamp APIs
> 
> Add three trace clock variants, with differing scalability/precision
> tradeoffs:
> 
>  -   local: CPU-local trace clock
>  -  medium: scalable global clock with some jitter
>  -  global: globally monotonic, serialized clock
> 
> Make the ring-buffer use the local trace clock internally.
> 
> Acked-by: Peter Zijlstra <peterz@...radead.org>
> Acked-by: Steven Rostedt <rostedt@...dmis.org>
> Signed-off-by: Ingo Molnar <mingo@...e.hu>
> 
> 
> ---
>  include/linux/trace_clock.h |   19 ++++++++
>  kernel/trace/Makefile       |    1 +
>  kernel/trace/ring_buffer.c  |    5 +-
>  kernel/trace/trace_clock.c  |  101 +++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 123 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/trace_clock.h b/include/linux/trace_clock.h
> new file mode 100644
> index 0000000..7a81303
> --- /dev/null
> +++ b/include/linux/trace_clock.h
> @@ -0,0 +1,19 @@
> +#ifndef _LINUX_TRACE_CLOCK_H
> +#define _LINUX_TRACE_CLOCK_H
> +
> +/*
> + * 3 trace clock variants, with differing scalability/precision
> + * tradeoffs:
> + *
> + *  -   local: CPU-local trace clock
> + *  -  medium: scalable global clock with some jitter
> + *  -  global: globally monotonic, serialized clock
> + */
> +#include <linux/compiler.h>
> +#include <linux/types.h>
> +
> +extern u64 notrace trace_clock_local(void);
> +extern u64 notrace trace_clock(void);
> +extern u64 notrace trace_clock_global(void);
> +
> +#endif /* _LINUX_TRACE_CLOCK_H */
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 664b6c0..c931fe0 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -19,6 +19,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
>  obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
>  
>  obj-$(CONFIG_TRACING) += trace.o
> +obj-$(CONFIG_TRACING) += trace_clock.o
>  obj-$(CONFIG_TRACING) += trace_output.o
>  obj-$(CONFIG_TRACING) += trace_stat.o
>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 8f19f1a..a8c275c 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -4,6 +4,7 @@
>   * Copyright (C) 2008 Steven Rostedt <srostedt@...hat.com>
>   */
>  #include <linux/ring_buffer.h>
> +#include <linux/trace_clock.h>
>  #include <linux/ftrace_irq.h>
>  #include <linux/spinlock.h>
>  #include <linux/debugfs.h>
> @@ -12,7 +13,6 @@
>  #include <linux/module.h>
>  #include <linux/percpu.h>
>  #include <linux/mutex.h>
> -#include <linux/sched.h>	/* used for sched_clock() (for now) */
>  #include <linux/init.h>
>  #include <linux/hash.h>
>  #include <linux/list.h>
> @@ -112,14 +112,13 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
>  /* Up this if you want to test the TIME_EXTENTS and normalization */
>  #define DEBUG_SHIFT 0
>  
> -/* FIXME!!! */
>  u64 ring_buffer_time_stamp(int cpu)
>  {
>  	u64 time;
>  
>  	preempt_disable_notrace();
>  	/* shift to debug/test normalization and TIME_EXTENTS */
> -	time = sched_clock() << DEBUG_SHIFT;
> +	time = trace_clock_local() << DEBUG_SHIFT;
>  	preempt_enable_no_resched_notrace();
>  
>  	return time;
> diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
> new file mode 100644
> index 0000000..2d4953f
> --- /dev/null
> +++ b/kernel/trace/trace_clock.c
> @@ -0,0 +1,101 @@
> +/*
> + * tracing clocks
> + *
> + *  Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@...hat.com>
> + *
> + * Implements 3 trace clock variants, with differing scalability/precision
> + * tradeoffs:
> + *
> + *  -   local: CPU-local trace clock
> + *  -  medium: scalable global clock with some jitter
> + *  -  global: globally monotonic, serialized clock
> + *
> + * Tracer plugins will chose a default from these clocks.
> + */
> +#include <linux/spinlock.h>
> +#include <linux/hardirq.h>
> +#include <linux/module.h>
> +#include <linux/percpu.h>
> +#include <linux/sched.h>
> +#include <linux/ktime.h>
> +
> +/*
> + * trace_clock_local(): the simplest and least coherent tracing clock.
> + *
> + * Useful for tracing that does not cross to other CPUs nor
> + * does it go through idle events.
> + */
> +u64 notrace trace_clock_local(void)
> +{
> +	/*
> +	 * sched_clock() is an architecture implemented, fast, scalable,
> +	 * lockless clock. It is not guaranteed to be coherent across
> +	 * CPUs, nor across CPU idle events.
> +	 */
> +	return sched_clock();
> +}
> +
> +/*
> + * trace_clock(): 'inbetween' trace clock. Not completely serialized,
> + * but not completely incorrect when crossing CPUs either.
> + *
> + * This is based on cpu_clock(), which will allow at most ~1 jiffy of
> + * jitter between CPUs. So it's a pretty scalable clock, but there
> + * can be offsets in the trace data.
> + */
> +u64 notrace trace_clock(void)
> +{
> +	return cpu_clock(raw_smp_processor_id());
> +}
> +
> +
> +/*
> + * trace_clock_global(): special globally coherent trace clock
> + *
> + * It has higher overhead than the other trace clocks but is still
> + * an order of magnitude faster than GTOD derived hardware clocks.
> + *
> + * Used by plugins that need globally coherent timestamps.
> + */
> +
> +static u64 prev_trace_clock_time;
> +
> +static raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
> +	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
> +
> +u64 notrace trace_clock_global(void)
> +{
> +	unsigned long flags;
> +	int this_cpu;
> +	u64 now;
> +
> +	raw_local_irq_save(flags);
> +
> +	this_cpu = raw_smp_processor_id();
> +	now = cpu_clock(this_cpu);
> +	/*
> +	 * If in an NMI context then dont risk lockups and return the
> +	 * cpu_clock() time:
> +	 */
> +	if (unlikely(in_nmi()))
> +		goto out;
> +
> +	__raw_spin_lock(&trace_clock_lock);
> +
> +	/*
> +	 * TODO: if this happens often then maybe we should reset
> +	 * my_scd->clock to prev_trace_clock_time+1, to make sure
> +	 * we start ticking with the local clock from now on?
> +	 */
> +	if ((s64)(now - prev_trace_clock_time) < 0)
> +		now = prev_trace_clock_time + 1;
> +
> +	prev_trace_clock_time = now;
> +
> +	__raw_spin_unlock(&trace_clock_lock);
> +
> + out:
> +	raw_local_irq_restore(flags);
> +
> +	return now;
> +}


Hi,

I missed this one.

Wouldn't your previous idea of a cmpxchg global clock be better?
Perhaps it would scale better while tracing on many cpus.

Anyway, it's something less in my TODO list :-)


> --
> To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ