lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 18 Nov 2015 14:39:31 +0000 (UTC)
From:	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To:	rostedt <rostedt@...dmis.org>
Cc:	linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...hat.com>,
	Peter Zijlstra <peterz@...radead.org>
Subject: Re: [PATCH v2 3/3] trace: emit tracepoint in preempt and irqs off
 tracer

----- On Nov 2, 2015, at 5:42 PM, Mathieu Desnoyers mathieu.desnoyers@...icios.com wrote:

> Allow Ftrace, Perf, and LTTng to hook on the irqsoff tracer and trace
> irqsoff and preemptoff critical sections.

Hi Steven,

I guess you ended up being busy lately. Your feedback on this patchset
is still very welcome.

Thanks,

Mathieu

> 
> How to setup the irqsoff tracer for this use-case (needed for use with
> Ftrace, Perf, or LTTng): if the tracing_thresh value is set, all
> durations beyond the specified threshold will emit a
> "core_critical_timing_hit" tracepoint. If unset, then the max duration
> will be tracked. The current max duration can be reset using the
> tracing_max_latency file.
> 
> echo 0 > /sys/kernel/debug/tracing/options/function-trace  # don't need function
> tracing
> echo 0 > /sys/kernel/debug/tracing/options/ftrace-buffer
> echo preemptirqsoff > /sys/kernel/debug/tracing/current_tracer
> echo 2 > /sys/kernel/debug/tracing/tracing_thresh      # set threshold (e.g. 2
> µs)
> echo 1 > /sys/kernel/debug/tracing/tracing_on
> 
> The following two commands can also be useful to tweak the
> irqsoff tracer:
> echo 0 > /sys/kernel/debug/tracing/tracing_max_latency # reset max latency
> echo 0 > /sys/kernel/debug/tracing/tracing_thresh      # disable threshold
> 
> *** An example usage of Ftrace hooking on this tracepoint: ***
> 
> echo 1 > /sys/kernel/debug/tracing/events/core/core_critical_timing_hit/enable
> cat /sys/kernel/debug/tracing/trace_pipe
> [...]
>    bash-2380   19d..3 52794942us : core_critical_timing_hit: ip=0xffffffff811021de
>    parent_ip=0xffffffff818d1b17 delta_ns=3402 irqs_disabled=0 preempt_disabled=1
>    in_softirq=0 in_irq=0 in_nmi=0
> 
> *** An example usage of Perf hooking on this tracepoint: ***
> 
> perf record -g -e core:core_critical_timing_hit -c 1 sleep 30
> perf report -g
> 
> +  100.00%   100.00%  sleep    [kernel.vmlinux]  [k] check_critical_timing
> +   92.75%     0.00%  sleep    [kernel.vmlinux]  [k] preempt_count_sub
> +   92.75%     0.00%  sleep    [kernel.vmlinux]  [k] trace_preempt_on
> +   65.22%     0.00%  sleep    [kernel.vmlinux]  [k] page_fault
> +   65.22%     0.00%  sleep    [kernel.vmlinux]  [k] do_page_fault
> +   63.77%     0.00%  sleep    [kernel.vmlinux]  [k] __do_page_fault
> +   63.77%     0.00%  sleep    [kernel.vmlinux]  [k] handle_mm_fault
> +   56.52%     0.00%  sleep    [kernel.vmlinux]  [k] _raw_spin_unlock
> +   30.43%     0.00%  sleep    ld-2.19.so        [.] _dl_sysdep_start
> [...]
> 
> *** An example usage of LTTng hooking on this tracepoint: ***
> 
> (as root)
> lttng-sessiond -d
> 
> (then, as root or tracing group)
> lttng create --live
> lttng enable-event -k core_critical_timing_hit
> lttng start
> lttng view
> [...]
> [17:34:17.090669588] (+1.569689755) compudjdev core_critical_timing_hit: {
> cpu_id = 3 }, { ip = 0xFFFFFFFF81102135, parent_ip = 0xFFFFFFFFA0798E13,
> delta_ns = 252994, irqs_disabled = 0, preempt_disabled = 1, in_softirq = 0,
> in_irq = 0, in_nmi = 0 }
> 
> Tracepoints core_critical_timing_start and core_critical_timing_stop are
> also available. They trigger at a much higher rate than
> core_critical_timing_hit.
> 
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
> CC: Thomas Gleixner <tglx@...utronix.de>
> CC: Steven Rostedt <rostedt@...dmis.org>
> CC: Ingo Molnar <mingo@...hat.com>
> CC: Peter Zijlstra <peterz@...radead.org>
> ---
> Changes since v1:
> - Remove unneeded tracepoint jump label bypass.
> - Enhance TP_printk() formatting.
> - Use new ftrace-buffer option in changelog.
> - Add core_critical_timing_start/stop tracepoints.
> ---
> include/trace/events/core.h  | 96 ++++++++++++++++++++++++++++++++++++++++++++
> kernel/trace/trace_irqsoff.c | 17 ++++++--
> 2 files changed, 109 insertions(+), 4 deletions(-)
> create mode 100644 include/trace/events/core.h
> 
> diff --git a/include/trace/events/core.h b/include/trace/events/core.h
> new file mode 100644
> index 0000000..ccc3430
> --- /dev/null
> +++ b/include/trace/events/core.h
> @@ -0,0 +1,96 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM core
> +
> +#if !defined(_TRACE_CORE_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_CORE_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/irqflags.h>
> +#include <linux/preempt.h>
> +
> +/*
> + * Tracepoint for critical timings max/threshold hit.
> + */
> +TRACE_EVENT(core_critical_timing_hit,
> +
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip,
> +			unsigned long flags, int preempt_cnt,
> +			cycles_t delta_ns),
> +
> +	TP_ARGS(ip, parent_ip, flags, preempt_cnt, delta_ns),
> +
> +	TP_STRUCT__entry(
> +		__field(	unsigned long, ip		)
> +		__field(	unsigned long, parent_ip	)
> +		__field(	cycles_t, delta_ns		)
> +		__field(	unsigned long, flags		)
> +		__field(	int, preempt_cnt		)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->ip		= ip;
> +		__entry->parent_ip	= parent_ip;
> +		__entry->delta_ns	= delta_ns;
> +		__entry->flags		= flags;
> +		__entry->preempt_cnt	= preempt_cnt;
> +	),
> +
> +	TP_printk("ip=0x%lx parent_ip=0x%lx delta_ns=%llu irqs_disabled=%u
> preempt_disabled=%u in_softirq=%u in_irq=%u in_nmi=%u",
> +		  __entry->ip, __entry->parent_ip,
> +		  (unsigned long long) __entry->delta_ns,
> +		  !!raw_irqs_disabled_flags(__entry->flags),
> +		  !!(__entry->preempt_cnt & PREEMPT_MASK),
> +		  !!(__entry->preempt_cnt & SOFTIRQ_MASK),
> +		  !!(__entry->preempt_cnt & HARDIRQ_MASK),
> +		  !!(__entry->preempt_cnt & NMI_MASK))
> +);
> +
> +/*
> + * Tracepoint for critical timings start/stop.
> + */
> +DECLARE_EVENT_CLASS(core_critical_timing,
> +
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip,
> +			unsigned long flags, int preempt_cnt),
> +
> +	TP_ARGS(ip, parent_ip, flags, preempt_cnt),
> +
> +	TP_STRUCT__entry(
> +		__field(	unsigned long, ip		)
> +		__field(	unsigned long, parent_ip	)
> +		__field(	unsigned long, flags		)
> +		__field(	int, preempt_cnt		)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->ip		= ip;
> +		__entry->parent_ip	= parent_ip;
> +		__entry->flags		= flags;
> +		__entry->preempt_cnt	= preempt_cnt;
> +	),
> +
> +	TP_printk("ip=0x%lx parent_ip=0x%lx irqs_disabled=%u preempt_disabled=%u
> in_softirq=%u in_irq=%u in_nmi=%u",
> +		  __entry->ip, __entry->parent_ip,
> +		  !!raw_irqs_disabled_flags(__entry->flags),
> +		  !!(__entry->preempt_cnt & PREEMPT_MASK),
> +		  !!(__entry->preempt_cnt & SOFTIRQ_MASK),
> +		  !!(__entry->preempt_cnt & HARDIRQ_MASK),
> +		  !!(__entry->preempt_cnt & NMI_MASK))
> +);
> +
> +DEFINE_EVENT(core_critical_timing, core_critical_timing_start,
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip,
> +		unsigned long flags, int preempt_cnt),
> +	TP_ARGS(ip, parent_ip, flags, preempt_cnt)
> +);
> +
> +DEFINE_EVENT(core_critical_timing, core_critical_timing_stop,
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip,
> +		unsigned long flags, int preempt_cnt),
> +	TP_ARGS(ip, parent_ip, flags, preempt_cnt)
> +);
> +
> +#endif /* _TRACE_CORE_H */
> +
> +/* This part must be outside protection */
> +#include <trace/define_trace.h>
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 3e5635d..abfb77e 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -14,6 +14,9 @@
> #include <linux/module.h>
> #include <linux/ftrace.h>
> 
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/core.h>
> +
> #include "trace.h"
> 
> static struct trace_array		*irqsoff_trace __read_mostly;
> @@ -339,6 +342,8 @@ check_critical_timing(struct trace_array *tr,
> 		/* Skip 5 functions to get to the irq/preempt enable function */
> 		__trace_stack(tr, flags, 5, pc);
> 	}
> +	trace_core_critical_timing_hit(CALLER_ADDR0, parent_ip, flags, pc,
> +			delta);
> 
> 	if (data->critical_sequence != max_sequence)
> 		goto out_unlock;
> @@ -365,7 +370,7 @@ out:
> static inline void
> start_critical_timing(unsigned long ip, unsigned long parent_ip)
> {
> -	int cpu;
> +	int cpu, pc;
> 	struct trace_array *tr = irqsoff_trace;
> 	struct trace_array_cpu *data;
> 	unsigned long flags;
> @@ -390,8 +395,10 @@ start_critical_timing(unsigned long ip, unsigned long
> parent_ip)
> 	data->critical_start = parent_ip ? : ip;
> 
> 	local_save_flags(flags);
> +	pc = preempt_count();
> 	if (trace_flags & TRACE_ITER_FTRACE_BUFFER)
> -		__trace_function(tr, ip, parent_ip, flags, preempt_count());
> +		__trace_function(tr, ip, parent_ip, flags, pc);
> +	trace_core_critical_timing_start(ip, parent_ip, flags, pc);
> 	per_cpu(tracing_cpu, cpu) = 1;
> 
> 	atomic_dec(&data->disabled);
> @@ -400,7 +407,7 @@ start_critical_timing(unsigned long ip, unsigned long
> parent_ip)
> static inline void
> stop_critical_timing(unsigned long ip, unsigned long parent_ip)
> {
> -	int cpu;
> +	int cpu, pc;
> 	struct trace_array *tr = irqsoff_trace;
> 	struct trace_array_cpu *data;
> 	unsigned long flags;
> @@ -424,8 +431,10 @@ stop_critical_timing(unsigned long ip, unsigned long
> parent_ip)
> 	atomic_inc(&data->disabled);
> 
> 	local_save_flags(flags);
> +	pc = preempt_count();
> 	if (trace_flags & TRACE_ITER_FTRACE_BUFFER)
> -		__trace_function(tr, ip, parent_ip, flags, preempt_count());
> +		__trace_function(tr, ip, parent_ip, flags, pc);
> +	trace_core_critical_timing_stop(ip, parent_ip, flags, pc);
> 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
> 	data->critical_start = 0;
> 	atomic_dec(&data->disabled);
> --
> 2.1.4

-- 
Mathieu Desnoyers
EfficiOS Inc.
http://www.efficios.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ