lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170920142313.4fc7c519@gandalf.local.home>
Date:   Wed, 20 Sep 2017 14:23:13 -0400
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Joel Fernandes <joelaf@...gle.com>
Cc:     linux-kernel@...r.kernel.org, Peter Zilstra <peterz@...radead.org>,
        kernel-team@...roid.com, Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH v5 2/2] tracing: Add support for preempt and irq
 enable/disable events

On Tue, 12 Sep 2017 11:11:09 -0700
Joel Fernandes <joelaf@...gle.com> wrote:

> Preempt and irq trace events can be used for tracing the start and
> end of an atomic section which can be used by a trace viewer like
> systrace to graphically view the start and end of an atomic section and
> correlate them with latencies and scheduling issues.
> 
> This also serves as a prelude to using synthetic events or probes to
> rewrite the preempt and irqsoff tracers, along with numerous benefits of
> using trace events features for these events.
> 
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Peter Zilstra <peterz@...radead.org>
> Cc: kernel-team@...roid.com
> Signed-off-by: Joel Fernandes <joelaf@...gle.com>
> ---
>  include/linux/ftrace.h            |  3 +-
>  include/linux/irqflags.h          |  4 +--
>  include/trace/events/preemptirq.h | 62 +++++++++++++++++++++++++++++++++++++++
>  kernel/trace/Kconfig              |  9 ++++++
>  kernel/trace/Makefile             |  1 +
>  kernel/trace/trace_irqsoff.c      | 57 ++++++++++++++++++++++++++++++++++-
>  6 files changed, 132 insertions(+), 4 deletions(-)
>  create mode 100644 include/trace/events/preemptirq.h
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 6383115e9d2c..5790ba7a85bd 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -742,7 +742,8 @@ static inline unsigned long get_lock_parent_ip(void)
>    static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
>  #endif
>  
> -#ifdef CONFIG_PREEMPT_TRACER
> +#if defined(CONFIG_PREEMPT_TRACER) || \
> +	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
>    extern void trace_preempt_on(unsigned long a0, unsigned long a1);
>    extern void trace_preempt_off(unsigned long a0, unsigned long a1);
>  #else
> diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
> index 5dd1272d1ab2..6ad6732a4e98 100644
> --- a/include/linux/irqflags.h
> +++ b/include/linux/irqflags.h
> @@ -44,8 +44,8 @@
>  # define INIT_TRACE_IRQFLAGS
>  #endif
>  
> -#if defined(CONFIG_IRQSOFF_TRACER) || \
> -	defined(CONFIG_PREEMPT_TRACER)
> +#if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER) || \
> +	defined(CONFIG_PREEMPTIRQ_EVENTS)
>   extern void stop_critical_timings(void);
>   extern void start_critical_timings(void);
>  #else
> diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
> new file mode 100644
> index 000000000000..601935e32cd1
> --- /dev/null
> +++ b/include/trace/events/preemptirq.h
> @@ -0,0 +1,62 @@
> +#ifdef CONFIG_PREEMPTIRQ_EVENTS
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM preemptirq
> +
> +#if !defined(_TRACE_PREEMPTIRQ_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_PREEMPTIRQ_H
> +
> +#include <linux/ktime.h>
> +#include <linux/tracepoint.h>
> +#include <linux/string.h>
> +#include <asm/sections.h>
> +
> +DECLARE_EVENT_CLASS(preemptirq_template,
> +
> +	TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +
> +	TP_ARGS(ip, parent_ip),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, caller_offs)
> +		__field(u32, parent_offs)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->caller_offs = (u32)(ip - (unsigned long)_stext);
> +		__entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext);
> +	),
> +
> +	TP_printk("caller=%pF parent=%pF\n",

No '\n', otherwise you add a lot of whitespace to the trace:

====
          <idle>-0     [002] d..1   252.533022: preempt_enable: caller=cpuidle_enter_state+0xfe/0x3a0 parent=cpuidle_enter+0x17/0x20

     rcu_preempt-8     [002] ...1   252.533063: preempt_enable: caller=schedule+0x4a/0x90 parent=schedule+0x4a/0x90

     rcu_preempt-8     [002] d..1   252.533065: preempt_disable: caller=_raw_spin_lock_irqsave+0x2f/0x60 parent=lock_timer_base+0x81/0xa0

     rcu_preempt-8     [002] ...1   252.533066: preempt_enable: caller=_raw_spin_unlock_irqrestore+0x4a/0x80 parent=try_to_del_timer_sync+0x44/0x60

     rcu_preempt-8     [002] d..1   252.533067: preempt_disable: caller=_raw_spin_lock_irqsave+0x2f/0x60 parent=prepare_to_swait+0x22/0x70

     rcu_preempt-8     [002] ...1   252.533067: preempt_enable: caller=_raw_spin_unlock_irqrestore+0x4a/0x80 parent=prepare_to_swait+0x63/0x70
====


> +		  (void *)((unsigned long)(_stext) + __entry->caller_offs),
> +		  (void *)((unsigned long)(_stext) + __entry->parent_offs))
> +);
> +
> +DEFINE_EVENT(preemptirq_template, irq_disable,
> +	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +	     TP_ARGS(ip, parent_ip));
> +
> +DEFINE_EVENT(preemptirq_template, irq_enable,
> +	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +	     TP_ARGS(ip, parent_ip));
> +
> +DEFINE_EVENT(preemptirq_template, preempt_disable,
> +	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +	     TP_ARGS(ip, parent_ip));
> +
> +DEFINE_EVENT(preemptirq_template, preempt_enable,
> +	     TP_PROTO(unsigned long ip, unsigned long parent_ip),
> +	     TP_ARGS(ip, parent_ip));
> +
> +#endif /* _TRACE_PREEMPTIRQ_H */
> +
> +#include <trace/define_trace.h>
> +
> +#else /* !CONFIG_PREEMPTIRQ_EVENTS */
> +
> +#define trace_irq_enable(...)
> +#define trace_irq_disable(...)
> +#define trace_preempt_enable(...)
> +#define trace_preempt_disable(...)
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 434c840e2d82..420666152de8 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -160,6 +160,15 @@ config FUNCTION_GRAPH_TRACER
>  	  address on the current task structure into a stack of calls.
>  
>  
> +config PREEMPTIRQ_EVENTS
> +	bool "Enable trace events for preempt and irq disable/enable"
> +	select TRACE_IRQFLAGS
> +	default n
> +	help
> +	  Enable tracing of disable and enable events for preemption and irqs.
> +	  For tracing preempt disable/enable events, CONFIG_DEBUG_PREEMPT must
> +	  be enabled.
> +
>  config IRQSOFF_TRACER
>  	bool "Interrupts-off Latency Tracer"
>  	default n
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 90f2701d92a7..9f62eee61f14 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -34,6 +34,7 @@ obj-$(CONFIG_TRACING) += trace_printk.o
>  obj-$(CONFIG_TRACING_MAP) += tracing_map.o
>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
>  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
> +obj-$(CONFIG_PREEMPTIRQ_EVENTS) += trace_irqsoff.o
>  obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
>  obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
>  obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
> diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
> index 26c688740742..361a17f83c4e 100644
> --- a/kernel/trace/trace_irqsoff.c
> +++ b/kernel/trace/trace_irqsoff.c
> @@ -16,6 +16,9 @@
>  
>  #include "trace.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/preemptirq.h>
> +
>  #if defined(CONFIG_IRQSOFF_TRACER) || defined(CONFIG_PREEMPT_TRACER)
>  static struct trace_array		*irqsoff_trace __read_mostly;
>  static int				tracer_enabled __read_mostly;
> @@ -777,14 +780,29 @@ static inline void tracer_preempt_on(unsigned long a0, unsigned long a1) { }
>  static inline void tracer_preempt_off(unsigned long a0, unsigned long a1) { }
>  #endif
>  
> +static DEFINE_PER_CPU(int, tracing_irq_cpu);
> +static DEFINE_PER_CPU(int, tracing_preempt_cpu);
> +
>  void start_critical_timings(void)
>  {
> +	if (this_cpu_read(tracing_preempt_cpu))
> +		trace_preempt_enable(CALLER_ADDR0, CALLER_ADDR1);
> +
> +	if (this_cpu_read(tracing_irq_cpu))
> +		trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
> +
>  	start_critical_timings_tracer();
>  }
>  EXPORT_SYMBOL_GPL(start_critical_timings);
>  
>  void stop_critical_timings(void)
>  {
> +	if (this_cpu_read(tracing_preempt_cpu))
> +		trace_preempt_disable(CALLER_ADDR0, CALLER_ADDR1);
> +
> +	if (this_cpu_read(tracing_irq_cpu))
> +		trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
> +
>  	stop_critical_timings_tracer();
>  }
>  EXPORT_SYMBOL_GPL(stop_critical_timings);
> @@ -792,24 +810,48 @@ EXPORT_SYMBOL_GPL(stop_critical_timings);
>  #if defined(CONFIG_TRACE_IRQFLAGS) && !defined(CONFIG_PROVE_LOCKING)
>  void trace_hardirqs_on(void)
>  {
> +	if (!this_cpu_read(tracing_irq_cpu))
> +		return;
> +
> +	trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
>  	tracer_hardirqs_on();
> +
> +	this_cpu_write(tracing_irq_cpu, 0);
>  }
>  EXPORT_SYMBOL(trace_hardirqs_on);
>  
>  void trace_hardirqs_off(void)
>  {
> +	if (this_cpu_read(tracing_irq_cpu))
> +		return;
> +
> +	this_cpu_write(tracing_irq_cpu, 1);
> +
> +	trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
>  	tracer_hardirqs_off();
>  }
>  EXPORT_SYMBOL(trace_hardirqs_off);
>  
>  __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
>  {
> +	if (!this_cpu_read(tracing_irq_cpu))
> +		return;
> +
> +	trace_irq_enable(CALLER_ADDR0, caller_addr);
>  	tracer_hardirqs_on_caller(caller_addr);
> +
> +	this_cpu_write(tracing_irq_cpu, 0);
>  }
>  EXPORT_SYMBOL(trace_hardirqs_on_caller);
>  
>  __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
>  {
> +	if (this_cpu_read(tracing_irq_cpu))
> +		return;
> +
> +	this_cpu_write(tracing_irq_cpu, 1);
> +
> +	trace_irq_disable(CALLER_ADDR0, caller_addr);
>  	tracer_hardirqs_off_caller(caller_addr);
>  }
>  EXPORT_SYMBOL(trace_hardirqs_off_caller);
> @@ -831,14 +873,27 @@ inline void print_irqtrace_events(struct task_struct *curr)
>  }
>  #endif
>  
> -#ifdef CONFIG_PREEMPT_TRACER
> +#if defined(CONFIG_PREEMPT_TRACER) || \
> +	(defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS))
>  void trace_preempt_on(unsigned long a0, unsigned long a1)
>  {
> +	if (!this_cpu_read(tracing_preempt_cpu))
> +		return;
> +
> +	trace_preempt_enable(a0, a1);
>  	tracer_preempt_on(a0, a1);
> +
> +	this_cpu_write(tracing_preempt_cpu, 0);
>  }
>  
>  void trace_preempt_off(unsigned long a0, unsigned long a1)
>  {
> +	if (this_cpu_read(tracing_preempt_cpu))
> +		return;
> +
> +	this_cpu_write(tracing_preempt_cpu, 1);
> +
> +	trace_preempt_disable(a0, a1);
>  	tracer_preempt_off(a0, a1);
>  }
>  #endif

Need to fold in the following patch as well. Otherwise RCU may have
issues (run with lockdep enabled and you'll see all the splats).

-- Steve

diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 361a17f..7dcb431 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -786,10 +786,10 @@ static DEFINE_PER_CPU(int, tracing_preempt_cpu);
 void start_critical_timings(void)
 {
 	if (this_cpu_read(tracing_preempt_cpu))
-		trace_preempt_enable(CALLER_ADDR0, CALLER_ADDR1);
+		trace_preempt_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
 
 	if (this_cpu_read(tracing_irq_cpu))
-		trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+		trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
 
 	start_critical_timings_tracer();
 }
@@ -798,10 +798,10 @@ EXPORT_SYMBOL_GPL(start_critical_timings);
 void stop_critical_timings(void)
 {
 	if (this_cpu_read(tracing_preempt_cpu))
-		trace_preempt_disable(CALLER_ADDR0, CALLER_ADDR1);
+		trace_preempt_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
 
 	if (this_cpu_read(tracing_irq_cpu))
-		trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+		trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
 
 	stop_critical_timings_tracer();
 }
@@ -813,7 +813,7 @@ void trace_hardirqs_on(void)
 	if (!this_cpu_read(tracing_irq_cpu))
 		return;
 
-	trace_irq_enable(CALLER_ADDR0, CALLER_ADDR1);
+	trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
 	tracer_hardirqs_on();
 
 	this_cpu_write(tracing_irq_cpu, 0);
@@ -827,7 +827,7 @@ void trace_hardirqs_off(void)
 
 	this_cpu_write(tracing_irq_cpu, 1);
 
-	trace_irq_disable(CALLER_ADDR0, CALLER_ADDR1);
+	trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
 	tracer_hardirqs_off();
 }
 EXPORT_SYMBOL(trace_hardirqs_off);
@@ -837,7 +837,7 @@ __visible void trace_hardirqs_on_caller(unsigned long caller_addr)
 	if (!this_cpu_read(tracing_irq_cpu))
 		return;
 
-	trace_irq_enable(CALLER_ADDR0, caller_addr);
+	trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
 	tracer_hardirqs_on_caller(caller_addr);
 
 	this_cpu_write(tracing_irq_cpu, 0);
@@ -851,9 +851,9 @@ __visible void trace_hardirqs_off_caller(unsigned long caller_addr)
 
 	this_cpu_write(tracing_irq_cpu, 1);
 
-	trace_irq_disable(CALLER_ADDR0, caller_addr);
-	tracer_hardirqs_off_caller(caller_addr);
-}
+	trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
+	tracer_hardirqs_off_caller(caller_addr);}
+
 EXPORT_SYMBOL(trace_hardirqs_off_caller);
 
 /*
@@ -880,7 +880,7 @@ void trace_preempt_on(unsigned long a0, unsigned long a1)
 	if (!this_cpu_read(tracing_preempt_cpu))
 		return;
 
-	trace_preempt_enable(a0, a1);
+	trace_preempt_enable_rcuidle(a0, a1);
 	tracer_preempt_on(a0, a1);
 
 	this_cpu_write(tracing_preempt_cpu, 0);
@@ -893,7 +893,7 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
 
 	this_cpu_write(tracing_preempt_cpu, 1);
 
-	trace_preempt_disable(a0, a1);
+	trace_preempt_disable_rcuidle(a0, a1);
 	tracer_preempt_off(a0, a1);
 }
 #endif

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ