lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130831192855.GH3871@linux.vnet.ibm.com>
Date:	Sat, 31 Aug 2013 12:28:55 -0700
From:	"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>
To:	Steven Rostedt <rostedt@...dmis.org>
Cc:	linux-kernel@...r.kernel.org, Ingo Molnar <mingo@...nel.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Jiri Olsa <jolsa@...hat.com>
Subject: Re: [RFC][PATCH 01/18 v2] ftrace: Add hash list to save RCU unsafe
 functions

On Sat, Aug 31, 2013 at 01:11:18AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Red Hat)" <rostedt@...dmis.org>
> 
> Some ftrace function tracing callbacks use RCU (perf), thus if
> it gets called from tracing a function outside of the RCU tracking,
> like in entering or leaving NO_HZ idle/userspace, the RCU read locks
> in the callback are useless.
> 
> As normal function tracing does not use RCU, and function tracing
> happens to help debug RCU, we do not want to limit all callbacks
> from tracing these "unsafe" RCU functions. Instead, we create an
> infrastructure that will let us mark these unsafe RCU functions
> and we will only allow callbacks that explicitly say they do not
> use RCU to be called by these functions.
> 
> This patch adds the infrastructure to create the hash of functions
> that are RCU unsafe. This is done with the FTRACE_UNSAFE_RCU()
> macro. It works like EXPORT_SYMBOL() does. Simply place the macro
> under the RCU unsafe function:
> 
> void func(void)
> {
> 	[...]
> }
> FTRACE_UNSAFE_RCU(func);
> 
> Cc: Jiri Olsa <jolsa@...hat.com>
> Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
> Signed-off-by: Steven Rostedt <rostedt@...dmis.org>

Acked-by: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>

> ---
>  include/asm-generic/vmlinux.lds.h |   10 +++
>  include/linux/ftrace.h            |   38 +++++++++++
>  kernel/trace/ftrace.c             |  135 ++++++++++++++++++++++++++++++++++---
>  3 files changed, 174 insertions(+), 9 deletions(-)
> 
> diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
> index 69732d2..fdfddd2 100644
> --- a/include/asm-generic/vmlinux.lds.h
> +++ b/include/asm-generic/vmlinux.lds.h
> @@ -93,6 +93,15 @@
>  #define MCOUNT_REC()
>  #endif
> 
> +#ifdef CONFIG_FUNCTION_TRACER
> +#define FTRACE_UNSAFE_RCU()	. = ALIGN(8);				\
> +			VMLINUX_SYMBOL(__start_ftrace_unsafe_rcu) = .;	\
> +			*(_ftrace_unsafe_rcu)				\
> +			VMLINUX_SYMBOL(__stop_ftrace_unsafe_rcu) = .;
> +#else
> +#define FTRACE_UNSAFE_RCU()
> +#endif
> +
>  #ifdef CONFIG_TRACE_BRANCH_PROFILING
>  #define LIKELY_PROFILE()	VMLINUX_SYMBOL(__start_annotated_branch_profile) = .; \
>  				*(_ftrace_annotated_branch)			      \
> @@ -479,6 +488,7 @@
>  	MEM_DISCARD(init.data)						\
>  	KERNEL_CTORS()							\
>  	MCOUNT_REC()							\
> +	FTRACE_UNSAFE_RCU()						\
>  	*(.init.rodata)							\
>  	FTRACE_EVENTS()							\
>  	TRACE_SYSCALLS()						\
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 9f15c00..1d17a82 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -92,6 +92,9 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
>   * STUB   - The ftrace_ops is just a place holder.
>   * INITIALIZED - The ftrace_ops has already been initialized (first use time
>   *            register_ftrace_function() is called, it will initialized the ops)
> + * RCU_SAFE - The callback uses no rcu type locking. This allows the
> + *            callback to be called in locations that RCU is not active.
> + *            (like going into or coming out of idle NO_HZ)
>   */
>  enum {
>  	FTRACE_OPS_FL_ENABLED			= 1 << 0,
> @@ -103,6 +106,7 @@ enum {
>  	FTRACE_OPS_FL_RECURSION_SAFE		= 1 << 6,
>  	FTRACE_OPS_FL_STUB			= 1 << 7,
>  	FTRACE_OPS_FL_INITIALIZED		= 1 << 8,
> +	FTRACE_OPS_FL_RCU_SAFE			= 1 << 9,
>  };
> 
>  struct ftrace_ops {
> @@ -219,6 +223,40 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
>  extern void ftrace_stub(unsigned long a0, unsigned long a1,
>  			struct ftrace_ops *op, struct pt_regs *regs);
> 
> +/*
> + * In order to speed up boot, save both the name and the
> + * address of the function to find to add to hashes. The
> + * list of function mcount addresses are sorted by the address,
> + * but we still need to use the name to find the actual location
> + * as the mcount call is usually not at the address of the
> + * start of the function.
> + */
> +struct ftrace_func_finder {
> +	const char		*name;
> +	unsigned long		ip;
> +};
> +
> +/*
> + * For functions that are called when RCU is not tracking the CPU
> + * (like for entering or leaving NO_HZ mode, and RCU then ignores
> + * the CPU), they need to be marked with FTRACE_UNSAFE_RCU().
> + * This will prevent all function tracing callbacks from calling
> + * this function unless the callback explicitly states that it
> + * doesn't use RCU with the FTRACE_OPS_FL_RCU_SAFE flag.
> + *
> + * The usage of FTRACE_UNSAFE_RCU() is the same as EXPORT_SYMBOL().
> + * Just place the macro underneath the function that is unsafe.
> + */
> +#define FTRACE_UNSAFE_RCU(func) \
> +	static struct ftrace_func_finder __ftrace_unsafe_rcu_##func	\
> +	 __initdata = {							\
> +		.name = #func,						\
> +		.ip = (unsigned long)func,				\
> +	};							\
> +	struct ftrace_func_finder *__ftrace_unsafe_rcu_ptr_##func	\
> +		  __attribute__((__section__("_ftrace_unsafe_rcu"))) =	\
> +		&__ftrace_unsafe_rcu_##func
> +
>  #else /* !CONFIG_FUNCTION_TRACER */
>  /*
>   * (un)register_ftrace_function must be a macro since the ops parameter
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a6d098c..3750360 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1356,6 +1356,23 @@ ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
>  static void
>  ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
> 
> +static int ftrace_convert_size_to_bits(int size)
> +{
> +	int bits;
> +
> +	/*
> +	 * Make the hash size about 1/2 the # found
> +	 */
> +	for (size /= 2; size; size >>= 1)
> +		bits++;
> +
> +	/* Don't allocate too much */
> +	if (bits > FTRACE_HASH_MAX_BITS)
> +		bits = FTRACE_HASH_MAX_BITS;
> +
> +	return bits;
> +}
> +
>  static int
>  ftrace_hash_move(struct ftrace_ops *ops, int enable,
>  		 struct ftrace_hash **dst, struct ftrace_hash *src)
> @@ -1388,15 +1405,7 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
>  		goto out;
>  	}
> 
> -	/*
> -	 * Make the hash size about 1/2 the # found
> -	 */
> -	for (size /= 2; size; size >>= 1)
> -		bits++;
> -
> -	/* Don't allocate too much */
> -	if (bits > FTRACE_HASH_MAX_BITS)
> -		bits = FTRACE_HASH_MAX_BITS;
> +	bits = ftrace_convert_size_to_bits(size);
> 
>  	ret = -ENOMEM;
>  	new_hash = alloc_ftrace_hash(bits);
> @@ -1486,6 +1495,74 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
>  	}
> 
> 
> +static __init int ftrace_find_ip(const void *a, const void *b)
> +{
> +	const struct dyn_ftrace *key = a;
> +	const struct dyn_ftrace *rec = b;
> +
> +	if (key->ip == rec->ip)
> +		return 0;
> +
> +	if (key->ip < rec->ip) {
> +		/* If previous is less than, then this is our func */
> +		rec--;
> +		if (rec->ip < key->ip)
> +			return 0;
> +		return -1;
> +	}
> +
> +	/* All else, we are greater */
> +	return 1;
> +}
> +
> +/*
> + * Find the mcount caller location given the ip address of the
> + * function that contains it. As the mcount caller is usually
> + * after the mcount itself.
> + *
> + * Done for just core functions at boot.
> + */
> +static __init unsigned long ftrace_mcount_from_func(unsigned long ip)
> +{
> +	struct ftrace_page *pg, *last_pg = NULL;
> +	struct dyn_ftrace *rec;
> +	struct dyn_ftrace key;
> +
> +	key.ip = ip;
> +
> +	for (pg = ftrace_pages_start; pg; last_pg = pg, pg = pg->next) {
> +		if (ip >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
> +			continue;
> +
> +		/*
> +		 * Do the first record manually, as we need to check
> +		 * the previous record from the previous page
> +		 */
> +		if (ip < pg->records[0].ip) {
> +			/* First page? Then we found our record! */
> +			if (!last_pg)
> +				return pg->records[0].ip;
> +
> +			rec = &last_pg->records[last_pg->index - 1];
> +			if (rec->ip < ip)
> +				return pg->records[0].ip;
> +
> +			/* Not found */
> +			return 0;
> +		}
> +
> +		rec = bsearch(&key, pg->records + 1, pg->index - 1,
> +			      sizeof(struct dyn_ftrace),
> +			      ftrace_find_ip);
> +		if (rec)
> +			return rec->ip;
> +
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
>  static int ftrace_cmp_recs(const void *a, const void *b)
>  {
>  	const struct dyn_ftrace *key = a;
> @@ -4211,6 +4288,44 @@ struct notifier_block ftrace_module_exit_nb = {
>  	.priority = INT_MIN,	/* Run after anything that can remove kprobes */
>  };
> 
> +extern struct ftrace_func_finder *__start_ftrace_unsafe_rcu[];
> +extern struct ftrace_func_finder *__stop_ftrace_unsafe_rcu[];
> +
> +static struct ftrace_hash *ftrace_unsafe_rcu;
> +
> +static void __init create_unsafe_rcu_hash(void)
> +{
> +	struct ftrace_func_finder *finder;
> +	struct ftrace_func_finder **p;
> +	unsigned long ip;
> +	char str[KSYM_SYMBOL_LEN];
> +	int count;
> +
> +	count = __stop_ftrace_unsafe_rcu - __start_ftrace_unsafe_rcu;
> +	if (!count)
> +		return;
> +
> +	count = ftrace_convert_size_to_bits(count);
> +	ftrace_unsafe_rcu = alloc_ftrace_hash(count);
> +
> +	for (p = __start_ftrace_unsafe_rcu;
> +	     p < __stop_ftrace_unsafe_rcu;
> +	     p++) {
> +		finder = *p;
> +
> +		ip = ftrace_mcount_from_func(finder->ip);
> +		if (WARN_ON_ONCE(!ip))
> +			continue;
> +
> +		/* Make sure this is our ip */
> +		kallsyms_lookup(ip, NULL, NULL, NULL, str);
> +		if (WARN_ON_ONCE(strcmp(str, finder->name) != 0))
> +			continue;
> +
> +		add_hash_entry(ftrace_unsafe_rcu, ip);
> +	}
> +}
> +
>  extern unsigned long __start_mcount_loc[];
>  extern unsigned long __stop_mcount_loc[];
> 
> @@ -4250,6 +4365,8 @@ void __init ftrace_init(void)
>  	if (ret)
>  		pr_warning("Failed to register trace ftrace module exit notifier\n");
> 
> +	create_unsafe_rcu_hash();
> +
>  	set_ftrace_early_filters();
> 
>  	return;
> -- 
> 1.7.10.4
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ