lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20110526165424.GD2386@linux.vnet.ibm.com>
Date:	Thu, 26 May 2011 09:54:24 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Steven Rostedt <rostedt@...dmis.org>
Cc:	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...e.hu>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Peter Zijlstra <peterz@...radead.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Witold Baryluk <baryluk@....if.uj.edu.pl>
Subject: Re: [PATCH 12/12] ftrace: Add internal recursive checks

On Thu, May 26, 2011 at 11:25:54AM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@...dmis.org>
> 
> Witold reported a reboot caused by the selftests of the dynamic function
> tracer. He sent me a config and I used ktest to do a config_bisect on it
> (as my config did not cause the crash). It pointed out that the problem
> config was CONFIG_PROVE_RCU.
> 
> What happened was that if multiple callbacks are attached to the
> function tracer, we iterate a list of callbacks. Because the list is
> managed by synchronize_sched() and preempt_disable, the access to the
> pointers uses rcu_dereference_raw().
> 
> When PROVE_RCU is enabled, the rcu_dereference_raw() calls some
> debugging functions, which happen to be traced. The tracing of the debug
> function would then call rcu_dereference_raw() which would then call the
> debug function and then... well you get the idea.
> 
> I first wrote two different patches to solve this bug.
> 
> 1) add a __rcu_dereference_raw() that would not do any checks.
> 2) add notrace to the offending debug functions.
> 
> Both of these patches worked.
> 
> Talking with Paul McKenney on IRC, he suggested to add recursion
> detection instead. This seemed to be a better solution, so I decided to
> implement it. As the task_struct already has a trace_recursion to detect
> recursion in the ring buffer, and that has a very small number it
> allows, I decided to use that same variable to add flags that can detect
> the recursion inside the infrastructure of the function tracer.
> 
> I plan to change it so that the task struct bit can be checked in
> mcount, but as that requires changes to all archs, I will hold that off
> to the next merge window.

Looks good to me.

Reviewed-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>

> Cc: Ingo Molnar <mingo@...e.hu>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: Frederic Weisbecker <fweisbec@...il.com>
> Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
> Link: http://lkml.kernel.org/r/1306348063.1465.116.camel@gandalf.stny.rr.com
> Reported-by: Witold Baryluk <baryluk@....if.uj.edu.pl>
> Signed-off-by: Steven Rostedt <rostedt@...dmis.org>
> ---
>  include/linux/sched.h      |    2 +-
>  kernel/trace/ftrace.c      |   13 ++++++++++++-
>  kernel/trace/ring_buffer.c |   10 +++++-----
>  kernel/trace/trace.h       |   15 +++++++++++++++
>  4 files changed, 33 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d8b2d0b..7b78d9c 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1513,7 +1513,7 @@ struct task_struct {
>  #ifdef CONFIG_TRACING
>  	/* state flags for use by tracers */
>  	unsigned long trace;
> -	/* bitmask of trace recursion */
> +	/* bitmask and counter of trace recursion */
>  	unsigned long trace_recursion;
>  #endif /* CONFIG_TRACING */
>  #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 25949b3..1ee417f 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
>  static void ftrace_global_list_func(unsigned long ip,
>  				    unsigned long parent_ip)
>  {
> -	struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/
> +	struct ftrace_ops *op;
> +
> +	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
> +		return;
> 
> +	trace_recursion_set(TRACE_GLOBAL_BIT);
> +	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
>  	while (op != &ftrace_list_end) {
>  		op->func(ip, parent_ip);
>  		op = rcu_dereference_raw(op->next); /*see above*/
>  	};
> +	trace_recursion_clear(TRACE_GLOBAL_BIT);
>  }
> 
>  static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
> @@ -3490,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
>  {
>  	struct ftrace_ops *op;
> 
> +	if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT)))
> +		return;
> +
> +	trace_recursion_set(TRACE_INTERNAL_BIT);
>  	/*
>  	 * Some of the ops may be dynamically allocated,
>  	 * they must be freed after a synchronize_sched().
> @@ -3502,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
>  		op = rcu_dereference_raw(op->next);
>  	};
>  	preempt_enable_notrace();
> +	trace_recursion_clear(TRACE_INTERNAL_BIT);
>  }
> 
>  static void clear_ftrace_swapper(void)
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index 0ef7b4b..b0c7aa4 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void)
> 
>  	printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
>  		    "HC[%lu]:SC[%lu]:NMI[%lu]\n",
> -		    current->trace_recursion,
> +		    trace_recursion_buffer(),
>  		    hardirq_count() >> HARDIRQ_SHIFT,
>  		    softirq_count() >> SOFTIRQ_SHIFT,
>  		    in_nmi());
> @@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void)
> 
>  static inline int trace_recursive_lock(void)
>  {
> -	current->trace_recursion++;
> +	trace_recursion_inc();
> 
> -	if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
> +	if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH))
>  		return 0;
> 
>  	trace_recursive_fail();
> @@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void)
> 
>  static inline void trace_recursive_unlock(void)
>  {
> -	WARN_ON_ONCE(!current->trace_recursion);
> +	WARN_ON_ONCE(!trace_recursion_buffer());
> 
> -	current->trace_recursion--;
> +	trace_recursion_dec();
>  }
> 
>  #else
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 6b69c4b..229f859 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[];
>  	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
>  #include "trace_entries.h"
> 
> +/* Only current can touch trace_recursion */
> +#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
> +#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
> +
> +/* Ring buffer has the 10 LSB bits to count */
> +#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
> +
> +/* for function tracing recursion */
> +#define TRACE_INTERNAL_BIT		(1<<11)
> +#define TRACE_GLOBAL_BIT		(1<<12)
> +
> +#define trace_recursion_set(bit)	do { (current)->trace_recursion |= (bit); } while (0)
> +#define trace_recursion_clear(bit)	do { (current)->trace_recursion &= ~(bit); } while (0)
> +#define trace_recursion_test(bit)	((current)->trace_recursion & (bit))
> +
>  #endif /* _LINUX_KERNEL_TRACE_H */
> -- 
> 1.7.4.4
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ