lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20161122173701.605ca2a5@gandalf.local.home>
Date:   Tue, 22 Nov 2016 17:37:01 -0500
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Andi Kleen <andi@...stfloor.org>
Cc:     linux-kernel@...r.kernel.org, Andi Kleen <ak@...ux.intel.com>,
        tom.zanussi@...ux.intel.com, peterz@...radead.org,
        alexander.shishkin@...el.com, Ingo Molnar <mingo@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        "H. Peter Anvin" <hpa@...or.com>
Subject: Re: [PATCH] Add support for disabling Intel PT trace in ftrace

On Fri, 18 Nov 2016 08:55:24 -0800
Andi Kleen <andi@...stfloor.org> wrote:

> From: Andi Kleen <ak@...ux.intel.com>
> 
> ftrace has powerfull trigger functions. Intel PT on modern Intel CPUs
> can trace execution flow.
> 
> For debugging I found it useful to disable the PT trace from ftrace triggers,
> for example when specific kernel functions are hit, which indicate
> a problem. Then we can see the exact execution trace up to this point.
> 
> This patch adds a "ptoff" ftrace trigger/function that disables the trace
> on the current function. The PT trace still has to be set up with perf
> 
> % perf record -e intel_pt// -a ... &
> % cd /sys/kernel/debug/tracing
> % echo do_page_fault:ptoff > set_ftrace_filter
> ...
> % cd -
> % kill %1
> % perf script --itrace=i0ns
> 
> I only implemented local disabling. Enabling would be much more complicated
> and require a black list of functions to avoid recursion. Global
> disabling with IPIs would be possible, but also risk some deadlock
> scenarios. Local disabling is very easy and can be done without
> accessing any special state, so there are no such problems. It is
> usually good enough for debugging purposes. The trace can be always
> reenabled from perf.
> 
> This patch adds "ptoff" both as ftrace trigger and ftrace functions.
> This makes it work from "set_ftrace_filter" and through the trigger
> field of trace points.
> 
> The PT driver exports a pt_disable() function for this that can be also
> used for manual instrumentation.
> 
> Cc: tom.zanussi@...ux.intel.com
> Cc: rostedt@...dmis.org
> Cc: peterz@...radead.org
> Cc: alexander.shishkin@...el.com
> Signed-off-by: Andi Kleen <ak@...ux.intel.com>
> ---
>  Documentation/trace/ftrace.txt      |  5 +++
>  arch/x86/events/intel/pt.c          | 16 ++++++++
>  include/linux/perf_event.h          |  2 +

Peter Z, Thomas, H. Peter, Ingo,

Are you guys fine with this change. If so I can pull this into my tree.

-- Steve

>  include/linux/trace_events.h        |  1 +
>  kernel/trace/trace.c                |  6 +++
>  kernel/trace/trace_events_trigger.c | 79 +++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace_functions.c      | 58 +++++++++++++++++++++++++++
>  7 files changed, 167 insertions(+)
> 
> diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
> index 185c39fea2a0..5dc8ec658678 100644
> --- a/Documentation/trace/ftrace.txt
> +++ b/Documentation/trace/ftrace.txt
> @@ -2549,6 +2549,11 @@ The following commands are supported:
>    command, it only prints out the contents of the ring buffer for the
>    CPU that executed the function that triggered the dump.
>  
> +- ptoff
> +  When the function is hit disable Intel PT trace. The Intel PT
> +  trace has to be set up earlier with perf record -a -e intel_pt// ...
> +  This disables the trace on the current CPU only.
> +
>  trace_pipe
>  ----------
>  
> diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
> index c5047b8f777b..cf15881da9a5 100644
> --- a/arch/x86/events/intel/pt.c
> +++ b/arch/x86/events/intel/pt.c
> @@ -1455,3 +1455,19 @@ static __init int pt_init(void)
>  	return ret;
>  }
>  arch_initcall(pt_init);
> +
> +/*
> + * Disable the PT trace for debugging purposes.
> + */
> +void pt_disable(void)
> +{
> +	u64 val;
> +
> +	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
> +		return;
> +
> +	rdmsrl_safe(MSR_IA32_RTIT_CTL, &val);
> +	val &= ~RTIT_CTL_TRACEEN;
> +	wrmsrl_safe(MSR_IA32_RTIT_CTL, val);
> +}
> +EXPORT_SYMBOL(pt_disable);
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 4741ecdb9817..a408d288298b 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1387,4 +1387,6 @@ int perf_event_exit_cpu(unsigned int cpu);
>  #define perf_event_exit_cpu	NULL
>  #endif
>  
> +void pt_disable(void);
> +
>  #endif /* _LINUX_PERF_EVENT_H */
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index be007610ceb0..4d2d4a1b738e 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -391,6 +391,7 @@ enum event_trigger_type {
>  	ETT_EVENT_ENABLE	= (1 << 3),
>  	ETT_EVENT_HIST		= (1 << 4),
>  	ETT_HIST_ENABLE		= (1 << 5),
> +	ETT_PTOFF		= (1 << 6),
>  };
>  
>  extern int filter_match_preds(struct event_filter *filter, void *rec);
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 8696ce6bf2f6..e55405dce821 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -4082,6 +4082,9 @@ static const char readme_msg[] =
>  #endif
>  	"\t\t      dump\n"
>  	"\t\t      cpudump\n"
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +	"\t\t      ptoff\n"
> +#endif
>  	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
>  	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
>  	"\t     The first one will disable tracing every time do_fault is hit\n"
> @@ -4175,6 +4178,9 @@ static const char readme_msg[] =
>  #ifdef CONFIG_HIST_TRIGGERS
>  	"\t\t    hist (see below)\n"
>  #endif
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +	"\t\t	ptoff\t\t- Disable PT trace on current CPU\n"
> +#endif
>  	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
>  	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
>  	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
> diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
> index a26ff1345784..b4ec8c417c12 100644
> --- a/kernel/trace/trace_events_trigger.c
> +++ b/kernel/trace/trace_events_trigger.c
> @@ -22,6 +22,7 @@
>  #include <linux/ctype.h>
>  #include <linux/mutex.h>
>  #include <linux/slab.h>
> +#include <linux/perf_event.h>
>  
>  #include "trace.h"
>  
> @@ -1044,6 +1045,83 @@ static struct event_command trigger_traceoff_cmd = {
>  	.set_filter		= set_trigger_filter,
>  };
>  
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +
> +static void
> +ptoff_trigger(struct event_trigger_data *data, void *rec)
> +{
> +	pt_disable();
> +}
> +
> +static void
> +ptoff_count_trigger(struct event_trigger_data *data, void *rec)
> +{
> +	if (!data->count)
> +		return;
> +
> +	if (data->count != -1)
> +		(data->count)--;
> +
> +	ptoff_trigger(data, rec);
> +}
> +
> +static int
> +ptoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
> +		       struct event_trigger_data *data)
> +{
> +	return event_trigger_print("ptoff", m, (void *)data->count,
> +				   data->filter_str);
> +}
> +
> +static struct event_trigger_ops ptoff_trigger_ops = {
> +	.func			= ptoff_trigger,
> +	.print			= ptoff_trigger_print,
> +	.init			= event_trigger_init,
> +	.free			= event_trigger_free,
> +};
> +
> +static struct event_trigger_ops ptoff_count_trigger_ops = {
> +	.func			= ptoff_count_trigger,
> +	.print			= ptoff_trigger_print,
> +	.init			= event_trigger_init,
> +	.free			= event_trigger_free,
> +};
> +
> +static struct event_trigger_ops *
> +ptoff_get_trigger_ops(char *cmd, char *param)
> +{
> +	return param ? &ptoff_count_trigger_ops : &ptoff_trigger_ops;
> +}
> +
> +static struct event_command trigger_ptoff_cmd = {
> +	.name			= "ptoff",
> +	.trigger_type		= ETT_PTOFF,
> +	.func			= event_trigger_callback,
> +	.reg			= register_trigger,
> +	.unreg			= unregister_trigger,
> +	.get_trigger_ops	= ptoff_get_trigger_ops,
> +	.set_filter		= set_trigger_filter,
> +};
> +
> +static __init int register_trigger_ptoff_cmd(void)
> +{
> +	int ret;
> +
> +	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
> +		return 0;
> +
> +	ret = register_event_command(&trigger_ptoff_cmd);
> +	WARN_ON(ret < 0);
> +
> +	return ret;
> +}
> +
> +#else
> +
> +static inline int register_trigger_ptoff_cmd(void) { return 0; }
> +
> +#endif
> +
>  #ifdef CONFIG_TRACER_SNAPSHOT
>  static void
>  snapshot_trigger(struct event_trigger_data *data, void *rec)
> @@ -1609,6 +1687,7 @@ __init int register_trigger_cmds(void)
>  	register_trigger_enable_disable_cmds();
>  	register_trigger_hist_enable_disable_cmds();
>  	register_trigger_hist_cmd();
> +	register_trigger_ptoff_cmd();
>  
>  	return 0;
>  }
> diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
> index 0efa00d80623..80867e3166f7 100644
> --- a/kernel/trace/trace_functions.c
> +++ b/kernel/trace/trace_functions.c
> @@ -15,6 +15,7 @@
>  #include <linux/ftrace.h>
>  #include <linux/slab.h>
>  #include <linux/fs.h>
> +#include <linux/perf_event.h>
>  
>  #include "trace.h"
>  
> @@ -643,6 +644,57 @@ static struct ftrace_func_command ftrace_cpudump_cmd = {
>  	.func			= ftrace_cpudump_callback,
>  };
>  
> +#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
> +static void
> +ftrace_ptoff_probe(unsigned long ip, unsigned long parent_ip, void **data)
> +{
> +	if (update_count(data))
> +		pt_disable();
> +}
> +
> +static int
> +ftrace_ptoff_print(struct seq_file *m, unsigned long ip,
> +			struct ftrace_probe_ops *ops, void *data)
> +{
> +	return ftrace_probe_print("ptoff", m, ip, data);
> +}
> +
> +static struct ftrace_probe_ops ptoff_probe_ops = {
> +	.func			= ftrace_ptoff_probe,
> +	.print			= ftrace_ptoff_print,
> +};
> +
> +static int
> +ftrace_ptoff_callback(struct ftrace_hash *hash,
> +			   char *glob, char *cmd, char *param, int enable)
> +{
> +	struct ftrace_probe_ops *ops;
> +
> +	ops = &ptoff_probe_ops;
> +
> +	/* Only dump once. */
> +	return ftrace_trace_probe_callback(ops, hash, glob, cmd,
> +					   "1", enable);
> +}
> +
> +static struct ftrace_func_command ftrace_ptoff_cmd = {
> +	.name			= "ptoff",
> +	.func			= ftrace_ptoff_callback,
> +};
> +
> +static int register_ptoff_command(void)
> +{
> +	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
> +		return 0;
> +	return register_ftrace_command(&ftrace_ptoff_cmd);
> +}
> +
> +#else
> +
> +static inline int register_ptoff_command(void) { return 0; }
> +
> +#endif
> +
>  static int __init init_func_cmd_traceon(void)
>  {
>  	int ret;
> @@ -667,8 +719,14 @@ static int __init init_func_cmd_traceon(void)
>  	if (ret)
>  		goto out_free_dump;
>  
> +	ret = register_ptoff_command();
> +	if (ret)
> +	    goto out_free_cpudump;
> +
>  	return 0;
>  
> + out_free_cpudump:
> +	unregister_ftrace_command(&ftrace_cpudump_cmd);
>   out_free_dump:
>  	unregister_ftrace_command(&ftrace_dump_cmd);
>   out_free_stacktrace:

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ