lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221020233318.aa41f0b5bb123c87af881316@kernel.org>
Date:   Thu, 20 Oct 2022 23:33:18 +0900
From:   Masami Hiramatsu (Google) <mhiramat@...nel.org>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     LKML <linux-kernel@...r.kernel.org>,
        Linux Trace Kernel <linux-trace-kernel@...r.kernel.org>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
        "Paul E. McKenney" <paulmck@...nel.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Tom Zanussi <zanussi@...nel.org>
Subject: Re: [PATCH] tracing: Add trace_trigger kernel command line option

On Wed, 19 Oct 2022 20:01:37 -0400
Steven Rostedt <rostedt@...dmis.org> wrote:

> From: "Steven Rostedt (Google)" <rostedt@...dmis.org>
> 
> Allow triggers to be enabled at kernel boot up. For example:
> 
>   trace_trigger="sched_switch.stacktrace if prev_state == 2"
> 
> The above will enable the stacktrace trigger on top of the sched_switch
> event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
> at boot up, a stacktrace will trigger and be recorded in the tracing ring
> buffer every time the sched_switch happens where the previous state is
> TASK_INTERRUPTIBLE.
> 
> As this calls into tracepoint logic during very early boot (before
> interrupts are enabled), a check has to be done to see if early boot
> interrupts are still disabled, and if so, avoid any call to RCU
> synchronization, as that will enable interrupts and cause boot up issues.

Just out of curiousity, can you do it by boot-time tracer?
(Is it too late for your issue?)

$ cat >> stacktrace.bconf
ftrace.event.sched.sched_switch.actions = "stacktrace if prev_state == 2"
^D
$ bootconfig -a stacktrace.bconf initrd.img

And boot kernel with "bootconfig".
Then, 

----------
            init-1       [000] d..3.     0.546668: <stack trace>
 => trace_event_raw_event_sched_switch
 => __traceiter_sched_switch
 => __schedule
 => schedule
 => schedule_timeout
 => wait_for_completion_killable
 => __kthread_create_on_node
 => kthread_create_on_node
 => audit_init
 => do_one_initcall
 => kernel_init_freeable
 => kernel_init
 => ret_from_fork
         kauditd-57      [007] d..3.     0.546677: <stack trace>
 => trace_event_raw_event_sched_switch
 => __traceiter_sched_switch
 => __schedule
 => schedule
 => schedule_preempt_disabled
 => kthread
 => ret_from_fork
----------

Thank you,

> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
> ---
>  .../admin-guide/kernel-parameters.txt         | 19 ++++++
>  include/linux/tracepoint.h                    |  4 ++
>  kernel/trace/trace.c                          |  3 +-
>  kernel/trace/trace_events.c                   | 63 ++++++++++++++++++-
>  kernel/tracepoint.c                           |  6 ++
>  5 files changed, 92 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index a465d5242774..ccf91a4bf113 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -6257,6 +6257,25 @@
>  			See also Documentation/trace/ftrace.rst "trace options"
>  			section.
>  
> +	trace_trigger=[trigger-list]
> +			[FTRACE] Add a event trigger on specific events.
> +			Set a trigger on top of a specific event, with an optional
> +			filter.
> +
> +			The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
> +			Where more than one trigger may be specified that are comma deliminated.
> +
> +			For example:
> +
> +			  trace_trigger="sched_switch.stacktrace if prev_state == 2"
> +
> +			The above will enable the "stacktrace" trigger on the "sched_switch"
> +			event but only trigger it if the "prev_state" of the "sched_switch"
> +			event is "2" (TASK_UNINTERUPTIBLE).
> +
> +			See also "Event triggers" in Documentation/trace/events.rst
> +
> +
>  	traceoff_on_warning
>  			[FTRACE] enable this option to disable tracing when a
>  			warning is hit. This turns off "tracing_on". Tracing can
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 4b33b95eb8be..a5c6b5772897 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -90,6 +90,10 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb)
>  #ifdef CONFIG_TRACEPOINTS
>  static inline void tracepoint_synchronize_unregister(void)
>  {
> +	/* Early updates do not need synchronization */
> +	if (early_boot_irqs_disabled)
> +		return;
> +
>  	synchronize_srcu(&tracepoint_srcu);
>  	synchronize_rcu();
>  }
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 47a44b055a1d..c03fd7037add 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -2749,7 +2749,8 @@ void trace_buffered_event_disable(void)
>  	preempt_enable();
>  
>  	/* Wait for all current users to finish */
> -	synchronize_rcu();
> +	if (!early_boot_irqs_disabled)
> +		synchronize_rcu();
>  
>  	for_each_tracing_cpu(cpu) {
>  		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index 0356cae0cf74..06554939252c 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call,
>  	return file;
>  }
>  
> +#ifdef CONFIG_HIST_TRIGGERS
> +#define MAX_BOOT_TRIGGERS 32
> +
> +static struct boot_triggers {
> +	const char		*event;
> +	char			*trigger;
> +} bootup_triggers[MAX_BOOT_TRIGGERS];
> +
> +static char bootup_trigger_buf[COMMAND_LINE_SIZE];
> +static int nr_boot_triggers;
> +
> +static __init int setup_trace_triggers(char *str)
> +{
> +	char *trigger;
> +	char *buf;
> +	int i;
> +
> +	strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
> +	ring_buffer_expanded = true;
> +	disable_tracing_selftest("running event triggers");
> +
> +	buf = bootup_trigger_buf;
> +	for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
> +		trigger = strsep(&buf, ",");
> +		if (!trigger)
> +			break;
> +		bootup_triggers[i].event = strsep(&trigger, ".");
> +		bootup_triggers[i].trigger = strsep(&trigger, ".");
> +		if (!bootup_triggers[i].trigger)
> +			break;
> +	}
> +
> +	nr_boot_triggers = i;
> +	return 1;
> +}
> +__setup("trace_trigger=", setup_trace_triggers);
> +#endif
> +
>  /* Add an event to a trace directory */
>  static int
>  __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
> @@ -2822,12 +2860,32 @@ __trace_early_add_new_event(struct trace_event_call *call,
>  			    struct trace_array *tr)
>  {
>  	struct trace_event_file *file;
> +	int ret;
> +	int i;
>  
>  	file = trace_create_new_event(call, tr);
>  	if (!file)
>  		return -ENOMEM;
>  
> -	return event_define_fields(call);
> +	ret = event_define_fields(call);
> +	if (ret)
> +		return ret;
> +
> +#ifdef CONFIG_HIST_TRIGGERS
> +	for (i = 0; i < nr_boot_triggers; i++) {
> +		if (strcmp(trace_event_name(call), bootup_triggers[i].event))
> +			continue;
> +		mutex_lock(&event_mutex);
> +		ret = trigger_process_regex(file, bootup_triggers[i].trigger);
> +		mutex_unlock(&event_mutex);
> +		if (ret)
> +			pr_err("Failed to register trigger '%s' on event %s\n",
> +			       bootup_triggers[i].trigger,
> +			       bootup_triggers[i].event);
> +	}
> +#endif
> +
> +	return 0;
>  }
>  
>  struct ftrace_module_file_ops;
> @@ -3726,6 +3784,8 @@ static __init int event_trace_enable(void)
>  			list_add(&call->list, &ftrace_events);
>  	}
>  
> +	register_trigger_cmds();
> +
>  	/*
>  	 * We need the top trace array to have a working set of trace
>  	 * points at early init, before the debug files and directories
> @@ -3740,7 +3800,6 @@ static __init int event_trace_enable(void)
>  
>  	register_event_cmds();
>  
> -	register_trigger_cmds();
>  
>  	return 0;
>  }
> diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
> index f23144af5743..f6e4ee1e40b3 100644
> --- a/kernel/tracepoint.c
> +++ b/kernel/tracepoint.c
> @@ -48,6 +48,9 @@ static void tp_rcu_get_state(enum tp_transition_sync sync)
>  {
>  	struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
>  
> +	if (early_boot_irqs_disabled)
> +		return;
> +
>  	/* Keep the latest get_state snapshot. */
>  	snapshot->rcu = get_state_synchronize_rcu();
>  	snapshot->srcu = start_poll_synchronize_srcu(&tracepoint_srcu);
> @@ -58,6 +61,9 @@ static void tp_rcu_cond_sync(enum tp_transition_sync sync)
>  {
>  	struct tp_transition_snapshot *snapshot = &tp_transition_snapshot[sync];
>  
> +	if (early_boot_irqs_disabled)
> +		return;
> +
>  	if (!snapshot->ongoing)
>  		return;
>  	cond_synchronize_rcu(snapshot->rcu);
> -- 
> 2.35.1
> 


-- 
Masami Hiramatsu (Google) <mhiramat@...nel.org>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ