lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20171107211706.328d7dd0@gandalf.local.home>
Date:   Tue, 7 Nov 2017 21:17:06 -0500
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Abderrahmane Benbachir <abderrahmane.benbachir@...ymtl.ca>
Cc:     linux-kernel@...r.kernel.org, mingo@...hat.com,
        peterz@...radead.org, mathieu.desnoyers@...icios.com
Subject: Re: [RFC PATCH v2] ftrace: support very early function tracing

On Tue, 07 Nov 2017 18:44:05 -0500
Abderrahmane Benbachir <abderrahmane.benbachir@...ymtl.ca> wrote:

> Hi Steve,
> 
> I updated the patch with all the fixes except for update timestamp part.
> 
> I used ftrace_export as you suggested, but I don't know how to hook to 
> ringbuffer to update the timestamp, I used the same way as before to
> update the time_delta in function ftrace_vearly_export_write.

OK, now seeing this implemented, I may take back some of what I said.
But still do it differently than what you did before. More below.

> 
> static u64 export_delta __initdata;
> 
> static void __init ftrace_vearly_export_write(const void *buf, unsigned int len)
> {
>         struct ring_buffer_event *event;
>         const u32 *entry = buf;
> 
>         event = container_of(entry, struct ring_buffer_event, array[0]);
>         event->time_delta = export_delta;
> }
> 
> Do I have to add a hook somewhere in the kernel/trace/ringbuffer.c ?
> And then call it from ftrace_vearly_export_write ?
> 
> 
> 
> PATCH v2
> --------------------------------------------------------------------
> 
> The very early tracing will start from the beginning of start_kernel()
> and will stop at ftrace_init().
> 
> start_kernel()
> {
>   ftrace_early_init() <--- start very early tracing
>   ...
>   (calls)
>   ...
>   ftrace_init()       <--- stop very early tracing
>   early_trace_init(); <--- fill ring buffer, start function tracer if on
>   ...
> }
> 
> Currently, the events are saved in a temporary static buffer, which they
> will be copied to the ring buffer when it will be allocated.
> 
> For the timestamp, we use rdtsc for x86, and sched_clock for the other
> archs.
> 
> Currently dynamic tracing is not implemented with live patching, we use
> ftrace_filter and ftrace_notrace to find which functions to be filtered
> (traced / not traced), then during the callback from mcount hook, we do
> binary search lookup to decide which function to be save or not.
> 
> Signed-off-by: Abderrahmane Benbachir <abderrahmane.benbachir@...ymtl.ca>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Ingo Molnar <mingo@...hat.com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
> Cc: linux-kernel@...r.kernel.org
> ---
>  arch/x86/Kconfig            |   1 +
>  arch/x86/kernel/ftrace_32.S |  73 ++++++++++
>  arch/x86/kernel/ftrace_64.S |  14 ++
>  include/linux/ftrace.h      |  16 ++-
>  init/main.c                 |   1 +
>  kernel/trace/Kconfig        |  42 ++++++
>  kernel/trace/ftrace.c       | 341 +++++++++++++++++++++++++++++++++++++++++++-
>  kernel/trace/trace.c        |   3 +
>  8 files changed, 487 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
> index 971feac13506..03da741c088b 100644
> --- a/arch/x86/Kconfig
> +++ b/arch/x86/Kconfig
> @@ -137,6 +137,7 @@ config X86
>  	select HAVE_FENTRY			if X86_64 || DYNAMIC_FTRACE
>  	select HAVE_FTRACE_MCOUNT_RECORD
>  	select HAVE_FUNCTION_GRAPH_TRACER
> +	select HAVE_VERY_EARLY_FTRACE
>  	select HAVE_FUNCTION_TRACER
>  	select HAVE_GCC_PLUGINS
>  	select HAVE_HW_BREAKPOINT
> diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
> index 722a145b4139..5d93037872b2 100644
> --- a/arch/x86/kernel/ftrace_32.S
> +++ b/arch/x86/kernel/ftrace_32.S
> @@ -29,8 +29,81 @@ EXPORT_SYMBOL(mcount)
>  # define MCOUNT_FRAME			0	/* using frame = false */
>  #endif
>  
> +.macro save_mcount_regs
> +#ifdef USING_FRAME_POINTER
> +# ifdef CC_USING_FENTRY
> +	/*
> +	 * Frame pointers are of ip followed by bp.
> +	 * Since fentry is an immediate jump, we are left with
> +	 * parent-ip, function-ip. We need to add a frame with
> +	 * parent-ip followed by ebp.
> +	 */
> +	pushl	4(%esp)				/* parent ip */
> +	pushl	%ebp
> +	movl	%esp, %ebp
> +	pushl	2*4(%esp)			/* function ip */
> +# endif
> +	/* For mcount, the function ip is directly above */
> +	pushl	%ebp
> +	movl	%esp, %ebp
> +#endif
> +	pushl	%eax
> +	pushl	%ecx
> +	pushl	%edx
> +	pushl	$0				/* Pass NULL as regs pointer */
> +
> +#ifdef USING_FRAME_POINTER
> +	/* Load parent ebp into edx */
> +	movl	4*4(%esp), %edx
> +#else
> +	/* There's no frame pointer, load the appropriate stack addr instead */
> +	lea	4*4(%esp), %edx
> +#endif
> +
> +	movl	(MCOUNT_FRAME+4)*4(%esp), %eax	/* load the rip */
> +	/* Get the parent ip */
> +	movl	4(%edx), %edx			/* edx has ebp */
> +
> +	movl	function_trace_op, %ecx
> +	subl	$MCOUNT_INSN_SIZE, %eax
> +	.endm
> +
> +.macro restore_mcount_regs
> +	addl	$4, %esp			/* skip NULL pointer */
> +	popl	%edx
> +	popl	%ecx
> +	popl	%eax
> +#ifdef USING_FRAME_POINTER
> +	popl	%ebp
> +# ifdef CC_USING_FENTRY
> +	addl	$4,%esp				/* skip function ip */
> +	popl	%ebp				/* this is the orig bp */
> +	addl	$4, %esp			/* skip parent ip */
> +# endif
> +#endif
> +	.endm
> +

The save and restore above looks to be a cut and paste copy of the
ftrace_caller code in the same file. Why not have those uses this too,
instead of having duplicate code?

> +
>  ENTRY(function_hook)
> +#ifdef CONFIG_VERY_EARLY_FUNCTION_TRACER
> +	cmpl	$__PAGE_OFFSET, %esp
> +	jb	vearly_stub			/* Paging not enabled yet? */
> +
> +	cmpl	$ftrace_stub, ftrace_vearly_trace_function
> +	jnz vearly_trace
> +
> +vearly_stub:
>  	ret
> +
> +vearly_trace:
> +	save_mcount_regs
> +	call	*ftrace_vearly_trace_function
> +	restore_mcount_regs
> +
> +	jmp vearly_stub
> +#else
> +	ret
> +#endif
>  END(function_hook)
>  
>  ENTRY(ftrace_caller)
> diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
> index 1dfac634bbf7..35f73a9dbbd2 100644
> --- a/arch/x86/kernel/ftrace_64.S
> +++ b/arch/x86/kernel/ftrace_64.S
> @@ -146,7 +146,21 @@ EXPORT_SYMBOL(mcount)
>  #ifdef CONFIG_DYNAMIC_FTRACE
>  
>  ENTRY(function_hook)
> +#ifdef CONFIG_VERY_EARLY_FUNCTION_TRACER
> +	cmpq $ftrace_stub, ftrace_vearly_trace_function
> +	jnz vearly_trace
> +
> +vearly_stub:
>  	retq
> +
> +vearly_trace:
> +	save_mcount_regs
> +	call *ftrace_vearly_trace_function
> +	restore_mcount_regs
> +	jmp vearly_stub
> +#else
> +	retq
> +#endif
>  END(function_hook)
>  
>  ENTRY(ftrace_caller)
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 2e028854bac7..c3f2ba246fc9 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -272,6 +272,16 @@ static inline void ftrace_kill(void) { }
>  static inline void ftrace_free_init_mem(void) { }
>  #endif /* CONFIG_FUNCTION_TRACER */
>  
> +#ifdef CONFIG_VERY_EARLY_FUNCTION_TRACER
> +extern void ftrace_early_init(char *command_line);
> +extern void ftrace_early_shutdown(void);
> +extern void ftrace_early_fill_ringbuffer(void *data);
> +#else
> +static inline void ftrace_early_init(char *command_line) { }
> +static inline void ftrace_early_shutdown(void) { }
> +static inline void ftrace_early_fill_ringbuffer(void *data) { }
> +#endif
> +
>  #ifdef CONFIG_STACK_TRACER
>  
>  #define STACK_TRACE_ENTRIES 500
> @@ -476,6 +486,10 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec);
>  
>  extern ftrace_func_t ftrace_trace_function;
>  
> +#if defined(CONFIG_VERY_EARLY_FUNCTION_TRACER) && defined(CONFIG_DYNAMIC_FTRACE)
> +extern ftrace_func_t ftrace_vearly_trace_function;
> +#endif
> +
>  int ftrace_regex_open(struct ftrace_ops *ops, int flag,
>  		  struct inode *inode, struct file *file);
>  ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
> @@ -757,7 +771,7 @@ static inline unsigned long get_lock_parent_ip(void)
>  #ifdef CONFIG_FTRACE_MCOUNT_RECORD
>  extern void ftrace_init(void);
>  #else
> -static inline void ftrace_init(void) { }
> +static inline void ftrace_init(void) { ftrace_early_shutdown(); }
>  #endif
>  
>  /*
> diff --git a/init/main.c b/init/main.c
> index 0ee9c6866ada..c794292b18ef 100644
> --- a/init/main.c
> +++ b/init/main.c
> @@ -511,6 +511,7 @@ asmlinkage __visible void __init start_kernel(void)
>  	char *command_line;
>  	char *after_dashes;
>  
> +	ftrace_early_init(boot_command_line);
>  	set_task_stack_end_magic(&init_task);
>  	smp_setup_processor_id();
>  	debug_objects_early_init();
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 434c840e2d82..0e1a9662bb2f 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -19,6 +19,11 @@ config HAVE_FUNCTION_TRACER
>  	help
>  	  See Documentation/trace/ftrace-design.txt
>  
> +config HAVE_VERY_EARLY_FTRACE
> +	bool
> +	help
> +	  See Documentation/trace/ftrace-design.txt
> +
>  config HAVE_FUNCTION_GRAPH_TRACER
>  	bool
>  	help
> @@ -145,6 +150,43 @@ config FUNCTION_TRACER
>  	  (the bootup default), then the overhead of the instructions is very
>  	  small and not measurable even in micro-benchmarks.
>  
> +config VERY_EARLY_FUNCTION_TRACER
> +	bool "Very Early Kernel Function Tracer"
> +	depends on FUNCTION_TRACER
> +	depends on HAVE_VERY_EARLY_FTRACE
> +	default y

Never put default y unless you have a very good reason to do so. The
DYNAMIC_FTRACE is default y because it is only enabled when
FUNCTION_TRACER is enabled, and DYNAMIC_FTRACE is better than static
ftrace.

This is to be default n (which is the default, so just remove the
default y).


> +	help
> +	  Enable very early function tracing. When ftrace_init will be called, all
> +	  mcount hooks will be replaced by nop, but before that (at very early
> +	  stage) mcounts are still there, and they are used for very early tracing.

OK, the help text needs a rewrite ;-)

	help
	  Normally, function tracing can only start after memory has
	  been initialized early in boot. If "ftrace=function" is added
	  to the command line, then function tracing will start after
	  memory setup.

	  In order to trace functions before that, this option will
	  have function tracing tracing before memory setup is
	  complete, by placing the trace in a temporary buffer, which
	  will be copied to the trace buffer after memory setup. The
	  size of this temporary buffer is defined by
	  VERY_EARLY_BUF_SHIFT.

> +
> +config VERY_EARLY_BUF_SHIFT

Probably should call this "VERY_EARLY_FTRACE_BUF_SHIFT" Just to be more
specific.

> +	int "Temporary buffer size (17 => 128 KB, 24 => 16 MB)"
> +	depends on VERY_EARLY_FUNCTION_TRACER
> +	range 0 24

Should probably have the range start at 8.


> +	default 19
> +	help
> +	  Select the size of the buffer to be used for storing function calls at
> +	  very early stage.
> +	  Examples:
> +	    20 =>   1 MB
> +	    19 => 512 KB
> +	    17 => 128 KB
> +
> +config VERY_EARLY_FILTER_SHIFT
> +	int "Temporary filter size (filter/ntrace) (17 =>  128 KB, 19 => 512 KB)"
> +	depends on VERY_EARLY_FUNCTION_TRACER
> +	depends on DYNAMIC_FTRACE
> +	range 0 19

0 is probably fine here if you don't want filters. But that should
probably be documented in the help text.

> +	default 17
> +	help
> +	  Select the size of the filter buffer to be used for filtering (trace/
> +	  no trace) functions at very early stage.

Probably want to include the kernel parameter used:

 "ftrace_filter" "ftrace_notrace"

> +	  Examples:
> +	    19 => 512 KB
> +	    18 => 256 KB
> +	    17 => 128 KB
> +
>  config FUNCTION_GRAPH_TRACER
>  	bool "Kernel Function Graph Tracer"
>  	depends on HAVE_FUNCTION_GRAPH_TRACER
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index 8319e09e15b9..5e1b53662bbb 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -33,7 +33,7 @@
>  #include <linux/list.h>
>  #include <linux/hash.h>
>  #include <linux/rcupdate.h>
> -
> +#include <linux/trace.h>

OK, we wont need this added. I have a different idea ;-)

>  #include <trace/events/sched.h>
>  
>  #include <asm/sections.h>
> @@ -65,6 +65,8 @@
>  #define FTRACE_HASH_MAX_BITS 12
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE
> +extern unsigned long __start_mcount_loc[];
> +extern unsigned long __stop_mcount_loc[];
>  #define INIT_OPS_HASH(opsname)	\
>  	.func_hash		= &opsname.local_hash,			\
>  	.local_hash.regex_lock	= __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
> @@ -5912,11 +5914,11 @@ void __init ftrace_free_init_mem(void)
>  
>  void __init ftrace_init(void)
>  {
> -	extern unsigned long __start_mcount_loc[];
> -	extern unsigned long __stop_mcount_loc[];
>  	unsigned long count, flags;
>  	int ret;
>  
> +	ftrace_early_shutdown();
> +
>  	local_irq_save(flags);
>  	ret = ftrace_dyn_arch_init();
>  	local_irq_restore(flags);
> @@ -6882,3 +6884,336 @@ void ftrace_graph_exit_task(struct task_struct *t)
>  	kfree(ret_stack);
>  }
>  #endif
> +
> +
> +#ifdef CONFIG_VERY_EARLY_FUNCTION_TRACER
> +
> +#define VEARLY_BUF_LEN ((1 << CONFIG_VERY_EARLY_BUF_SHIFT) / \
> +					sizeof(struct ftrace_vearly_entry))
> +/*
> + * In very early stage :
> + *  - only CPU0 is running in early, there is no need to store it
> + *  - tsc counter is used in x86, otherwise we use sched_clock
> + */
> +struct ftrace_vearly_entry {
> +	unsigned long ip;
> +	unsigned long parent_ip;
> +	unsigned long long clock;
> +};
> +static struct ftrace_vearly_entry ftrace_vearly_entries[VEARLY_BUF_LEN] __initdata;
> +static const unsigned long VERY_EARLY_BUF_LEN __initconst = VEARLY_BUF_LEN;
> +static struct trace_export ftrace_vearly_export __initdata;
> +static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
> +static unsigned int ftrace_vearly_enabled __initdata;
> +static unsigned int vearly_entries_count __initdata;
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +ftrace_func_t ftrace_vearly_trace_function __read_mostly = ftrace_stub;
> +#else
> +# define ftrace_vearly_trace_function ftrace_trace_function
> +#endif
> +#define ftrace_vearly_disable() (ftrace_vearly_trace_function = ftrace_stub)

Make this a static inline function.

> +
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +#define VEARLY_FILTER_SIZE ((1 << CONFIG_VERY_EARLY_FILTER_SHIFT) / \
> +					sizeof(unsigned long))
> +struct ftrace_vearly_filtering {
> +	unsigned long list[VEARLY_FILTER_SIZE];
> +	char buf[COMMAND_LINE_SIZE];
> +	int size;
> +};
> +static const unsigned long FILTER_SIZE __initconst = VEARLY_FILTER_SIZE;
> +static struct ftrace_vearly_filtering ftrace_data_notrace __initdata;
> +static struct ftrace_vearly_filtering ftrace_data_filter __initdata;
> +
> +static __init int
> +ftrace_vearly_match_record(unsigned long ip, struct ftrace_glob *func_g)
> +{
> +	char str[KSYM_SYMBOL_LEN];
> +	char *modname;
> +
> +	kallsyms_lookup(ip, NULL, NULL, &modname, str);
> +	return ftrace_match(str, func_g);
> +}
> +
> +static __init int ftrace_vearly_filter_has_addr(unsigned long addr,
> +	unsigned long *filter, int *size)
> +{
> +	int i;
> +
> +	for (i = 0; i < *size; i++) {
> +		if (filter[i] == addr)
> +			return 1;
> +	}
> +	return 0;
> +}
> +
> +static __init void
> +ftrace_vearly_regex(char *func, unsigned long *filter, int *size)
> +{
> +	struct ftrace_glob func_g = { .type = MATCH_FULL };
> +	unsigned long *start = __start_mcount_loc;
> +	unsigned long *end = __stop_mcount_loc;
> +	unsigned long count;
> +	unsigned long addr;
> +	unsigned long *p;
> +	int clear_filter = 0;
> +
> +	count = end - start;
> +
> +	if (!count)
> +		return;
> +
> +	if (func) {
> +		func_g.type = filter_parse_regex(func, strlen(func), &func_g.search,
> +						 &clear_filter);
> +		func_g.len = strlen(func_g.search);
> +	}
> +
> +	p = start;
> +	while (p < end) {
> +		addr = ftrace_call_adjust(*p++);
> +		if (!addr)
> +			continue;
> +
> +		if ((*size) > FILTER_SIZE)
> +			return;
> +
> +		if (ftrace_vearly_match_record(addr, &func_g)) {
> +			if (!ftrace_vearly_filter_has_addr(addr, filter, size))
> +				filter[(*size)++] = addr;
> +		}
> +	}
> +}
> +
> +static int __init ftrace_addr_compare(const void *a, const void *b)
> +{
> +	if (*(unsigned long *)a > *(unsigned long *)b)
> +		return 1;
> +	if (*(unsigned long *)a < *(unsigned long *)b)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +static void __init ftrace_addr_swap(void *a, void *b, int size)
> +{
> +	unsigned long t = *(unsigned long *)a;
> +	*(unsigned long *)a = *(unsigned long *)b;
> +	*(unsigned long *)b = t;
> +}
> +
> +static __init int set_ftrace_vearly_filtering(void *data, char *str)
> +{
> +	struct ftrace_vearly_filtering *ftrace_data = data;
> +	char *func;
> +	char *buf;
> +
> +	if (!ftrace_data)
> +		return 0;
> +	buf = ftrace_data->buf;
> +	strlcpy(buf, str, COMMAND_LINE_SIZE);
> +
> +	while (buf) {
> +		func = strsep(&buf, ",");
> +		ftrace_vearly_regex(func, ftrace_data->list, &ftrace_data->size);
> +	}
> +	/* sort filter to use binary search on it */
> +	sort(ftrace_data->list, ftrace_data->size,
> +		sizeof(unsigned long), ftrace_addr_compare, ftrace_addr_swap);
> +
> +	return 1;
> +}
> +
> +#define ftrace_vearly_bsearch_addr(addr, data) bsearch(&addr, data.list,\
> +	data.size, sizeof(unsigned long), ftrace_addr_compare)
> +
> +#endif /* CONFIG_DYNAMIC_FTRACE */
> +
> +
> +static __init void
> +ftrace_function_vearly_trace_call(unsigned long ip, unsigned long parent_ip,
> +			struct ftrace_ops *op, struct pt_regs *regs)
> +{
> +	struct ftrace_vearly_entry *entry;
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +	if (ftrace_data_notrace.size &&
> +			ftrace_vearly_bsearch_addr(ip, ftrace_data_notrace))
> +		return;
> +
> +	if (ftrace_data_filter.size &&
> +			!ftrace_vearly_bsearch_addr(ip, ftrace_data_filter))
> +		return;
> +#endif
> +
> +	if (vearly_entries_count >= VERY_EARLY_BUF_LEN) {
> +		/* stop tracing when buffer is full */
> +		ftrace_vearly_disable();
> +		return;
> +	}
> +
> +	entry = &ftrace_vearly_entries[vearly_entries_count++];
> +	entry->ip = ip;
> +	entry->parent_ip = parent_ip;
> +#ifdef CONFIG_X86_TSC
> +	entry->clock = rdtsc();
> +#else
> +	entry->clock = trace_clock_local();
> +#endif
> +}
> +
> +/*
> + * ns = cycles * (10^6 / cpu_khz)
> + */
> +static u64 __init cycles_to_ns(u64 cycles, u64 khz)
> +{
> +	u64 ns = cycles;
> +	u64 dividend, divisor = 0;
> +	const u64 constant = 1000000;
> +
> +	if (khz > constant) {
> +		// ns /= (khz / constant);
> +		dividend = khz;
> +		divisor = constant;
> +		do_div(dividend, divisor);
> +		do_div(ns, dividend);
> +	} else {
> +		dividend = constant;
> +		divisor = khz;
> +		do_div(dividend, divisor);
> +		ns *= dividend;
> +	}
> +	return ns;
> +}
> +
> +/**
> + * delta btw the current and previous event, used in ftrace export to
> + * update entry time delta
> + */
> +static u64 export_delta __initdata;
> +/**
> + * update entry timestamp
> + */
> +static void __init ftrace_vearly_export_write(const void *buf, unsigned int len)
> +{
> +	struct ring_buffer_event *event;
> +	const u32 *entry = buf;
> +
> +	event = container_of(entry, struct ring_buffer_event, array[0]);
> +	event->time_delta = export_delta;
> +}

This still touches internal ring buffer data. But I think I have a fix.

> +
> +void __init ftrace_early_fill_ringbuffer(void *data)
> +{
> +	struct ftrace_vearly_entry *entry;
> +	struct trace_array *tr = data;
> +	unsigned long cpu_khz;
> +	u64 ns, prev_ns = 0;
> +	int i;
> +
> +	if (!ftrace_vearly_enabled)
> +		return;
> +
> +#ifdef CONFIG_X86_TSC
> +	cpu_khz = native_calibrate_cpu();
> +#endif
> +
> +	ftrace_vearly_export.write = ftrace_vearly_export_write;

Let's scrap the ftrace_export idea. Sorry for having you try it, but
now I see what you are doing better.

Let's do:

	ring_buffer_set_clock(tr->trace_buffer.buffer,
				early_trace_clock);

Then have:

static u64 early_timestamp __initdata;

static __init u64 early_trace_clock(void)
{
	return early_timestamp;
}

Then we can have:

> +	preempt_disable_notrace();
> +	for (i = 0; i < vearly_entries_count; i++) {
> +		entry = &ftrace_vearly_entries[i];
> +
> +#ifdef CONFIG_X86_TSC
> +		ns = cycles_to_ns(entry->clock, cpu_khz);
> +#else
> +		ns = entry->clock;
> +#endif

		early_timestamp = ns;

> +		trace_function(tr, entry->ip, entry->parent_ip, 0, 0);

And it will fill the entries properly, and you don't need to worry
about delta's or anything.

> +	}
> +	preempt_enable_notrace();

	/* Set the default clock back */
	ring_buffer_set_clock(tr->trace_buffer.buffer,
			trace_local_clock);


> +}
> +
> +static __init int set_ftrace_vearly_enable(void *data, char *str)
> +{
> +	ftrace_vearly_enabled = 1;
> +	return 1;
> +}
> +/*
> + * this will be used as __setup_param
> + */
> +struct ftrace_vearly_obs_param {
> +	int (*setup_func)(void *, char*);
> +	const char *str;
> +	void *data;
> +};
> +static struct ftrace_vearly_obs_param ftrace_vearly_params[] __initdata = {
> +	{ .str = "ftrace_vearly", .setup_func = set_ftrace_vearly_enable },
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +	{
> +		.str = "ftrace_notrace",
> +		.data = &ftrace_data_notrace,
> +		.setup_func = set_ftrace_vearly_filtering,
> +	},
> +	{
> +		.str = "ftrace_filter",
> +		.data = &ftrace_data_filter,
> +		.setup_func = set_ftrace_vearly_filtering,
> +	},
> +#endif

Hmm, wouldn't this actually still be able to work even if
DYNAMIC_FTRACE was not set?

-- Steve

> +};
> +
> +static int __init ftrace_do_very_early_param(char *param, char *val,
> +				 const char *unused, void *arg)
> +{
> +	int size = ARRAY_SIZE(ftrace_vearly_params);
> +	struct ftrace_vearly_obs_param *p;
> +	int i;
> +
> +	for (i = 0; i < size; i++) {
> +		p = &ftrace_vearly_params[i];
> +		if (strcmp(param, p->str) == 0) {
> +			p->setup_func(p->data, val);
> +			return 0;
> +		}
> +	}
> +	return 0;
> +}
> +
> +void __init ftrace_early_init(char *command_line)
> +{
> +	/* proceed only if 'ftrace_vearly' exist */
> +	if (!strstr(command_line, "ftrace_vearly"))
> +		return;
> +
> +	strlcpy(tmp_cmdline, command_line, COMMAND_LINE_SIZE);
> +	parse_args("ftrace vearly options", tmp_cmdline, NULL, 0, 0, 0, NULL,
> +		ftrace_do_very_early_param);
> +
> +	if (ftrace_vearly_enabled)
> +		ftrace_vearly_trace_function = ftrace_function_vearly_trace_call;
> +}
> +
> +void __init ftrace_early_shutdown(void)
> +{
> +	if (!ftrace_vearly_enabled)
> +		return;
> +
> +	ftrace_vearly_disable();
> +
> +#ifdef CONFIG_DYNAMIC_FTRACE
> +	pr_info("ftrace: vearly %u entries, notrace=%d, filter=%d",
> +		vearly_entries_count,
> +		ftrace_data_notrace.size,
> +		ftrace_data_filter.size);
> +#else
> +	pr_info("ftrace: vearly %u recorded entries", vearly_entries_count);
> +#endif
> +}
> +
> +#endif /* CONFIG_VERY_EARLY_FUNCTION_TRACER */
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 752e5daf0896..c097b97b3025 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -8364,6 +8364,9 @@ __init static int tracer_alloc_buffers(void)
>  	/* Function tracing may start here (via kernel command line) */
>  	init_function_trace();
>  
> +	/* Pre-fill the ring buffer with very early events */
> +	ftrace_early_fill_ringbuffer(&global_trace);
> +
>  	/* All seems OK, enable tracing */
>  	tracing_disabled = 0;
>  

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ