lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1479488124-17034-1-git-send-email-andi@firstfloor.org>
Date:   Fri, 18 Nov 2016 08:55:24 -0800
From:   Andi Kleen <andi@...stfloor.org>
To:     linux-kernel@...r.kernel.org
Cc:     Andi Kleen <ak@...ux.intel.com>, tom.zanussi@...ux.intel.com,
        rostedt@...dmis.org, peterz@...radead.org,
        alexander.shishkin@...el.com
Subject: [PATCH] Add support for disabling Intel PT trace in ftrace

From: Andi Kleen <ak@...ux.intel.com>

ftrace has powerfull trigger functions. Intel PT on modern Intel CPUs
can trace execution flow.

For debugging I found it useful to disable the PT trace from ftrace triggers,
for example when specific kernel functions are hit, which indicate
a problem. Then we can see the exact execution trace up to this point.

This patch adds a "ptoff" ftrace trigger/function that disables the trace
on the current function. The PT trace still has to be set up with perf

% perf record -e intel_pt// -a ... &
% cd /sys/kernel/debug/tracing
% echo do_page_fault:ptoff > set_ftrace_filter
...
% cd -
% kill %1
% perf script --itrace=i0ns

I only implemented local disabling. Enabling would be much more complicated
and require a black list of functions to avoid recursion. Global
disabling with IPIs would be possible, but also risk some deadlock
scenarios. Local disabling is very easy and can be done without
accessing any special state, so there are no such problems. It is
usually good enough for debugging purposes. The trace can be always
reenabled from perf.

This patch adds "ptoff" both as ftrace trigger and ftrace functions.
This makes it work from "set_ftrace_filter" and through the trigger
field of trace points.

The PT driver exports a pt_disable() function for this that can be also
used for manual instrumentation.

Cc: tom.zanussi@...ux.intel.com
Cc: rostedt@...dmis.org
Cc: peterz@...radead.org
Cc: alexander.shishkin@...el.com
Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 Documentation/trace/ftrace.txt      |  5 +++
 arch/x86/events/intel/pt.c          | 16 ++++++++
 include/linux/perf_event.h          |  2 +
 include/linux/trace_events.h        |  1 +
 kernel/trace/trace.c                |  6 +++
 kernel/trace/trace_events_trigger.c | 79 +++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_functions.c      | 58 +++++++++++++++++++++++++++
 7 files changed, 167 insertions(+)

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 185c39fea2a0..5dc8ec658678 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -2549,6 +2549,11 @@ The following commands are supported:
   command, it only prints out the contents of the ring buffer for the
   CPU that executed the function that triggered the dump.
 
+- ptoff
+  When the function is hit disable Intel PT trace. The Intel PT
+  trace has to be set up earlier with perf record -a -e intel_pt// ...
+  This disables the trace on the current CPU only.
+
 trace_pipe
 ----------
 
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index c5047b8f777b..cf15881da9a5 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1455,3 +1455,19 @@ static __init int pt_init(void)
 	return ret;
 }
 arch_initcall(pt_init);
+
+/*
+ * Disable the PT trace for debugging purposes.
+ */
+void pt_disable(void)
+{
+	u64 val;
+
+	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
+		return;
+
+	rdmsrl_safe(MSR_IA32_RTIT_CTL, &val);
+	val &= ~RTIT_CTL_TRACEEN;
+	wrmsrl_safe(MSR_IA32_RTIT_CTL, val);
+}
+EXPORT_SYMBOL(pt_disable);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4741ecdb9817..a408d288298b 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1387,4 +1387,6 @@ int perf_event_exit_cpu(unsigned int cpu);
 #define perf_event_exit_cpu	NULL
 #endif
 
+void pt_disable(void);
+
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index be007610ceb0..4d2d4a1b738e 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -391,6 +391,7 @@ enum event_trigger_type {
 	ETT_EVENT_ENABLE	= (1 << 3),
 	ETT_EVENT_HIST		= (1 << 4),
 	ETT_HIST_ENABLE		= (1 << 5),
+	ETT_PTOFF		= (1 << 6),
 };
 
 extern int filter_match_preds(struct event_filter *filter, void *rec);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8696ce6bf2f6..e55405dce821 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4082,6 +4082,9 @@ static const char readme_msg[] =
 #endif
 	"\t\t      dump\n"
 	"\t\t      cpudump\n"
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+	"\t\t      ptoff\n"
+#endif
 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
 	"\t     The first one will disable tracing every time do_fault is hit\n"
@@ -4175,6 +4178,9 @@ static const char readme_msg[] =
 #ifdef CONFIG_HIST_TRIGGERS
 	"\t\t    hist (see below)\n"
 #endif
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+	"\t\t	ptoff\t\t- Disable PT trace on current CPU\n"
+#endif
 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index a26ff1345784..b4ec8c417c12 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -22,6 +22,7 @@
 #include <linux/ctype.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 
@@ -1044,6 +1045,83 @@ static struct event_command trigger_traceoff_cmd = {
 	.set_filter		= set_trigger_filter,
 };
 
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+
+static void
+ptoff_trigger(struct event_trigger_data *data, void *rec)
+{
+	pt_disable();
+}
+
+static void
+ptoff_count_trigger(struct event_trigger_data *data, void *rec)
+{
+	if (!data->count)
+		return;
+
+	if (data->count != -1)
+		(data->count)--;
+
+	ptoff_trigger(data, rec);
+}
+
+static int
+ptoff_trigger_print(struct seq_file *m, struct event_trigger_ops *ops,
+		       struct event_trigger_data *data)
+{
+	return event_trigger_print("ptoff", m, (void *)data->count,
+				   data->filter_str);
+}
+
+static struct event_trigger_ops ptoff_trigger_ops = {
+	.func			= ptoff_trigger,
+	.print			= ptoff_trigger_print,
+	.init			= event_trigger_init,
+	.free			= event_trigger_free,
+};
+
+static struct event_trigger_ops ptoff_count_trigger_ops = {
+	.func			= ptoff_count_trigger,
+	.print			= ptoff_trigger_print,
+	.init			= event_trigger_init,
+	.free			= event_trigger_free,
+};
+
+static struct event_trigger_ops *
+ptoff_get_trigger_ops(char *cmd, char *param)
+{
+	return param ? &ptoff_count_trigger_ops : &ptoff_trigger_ops;
+}
+
+static struct event_command trigger_ptoff_cmd = {
+	.name			= "ptoff",
+	.trigger_type		= ETT_PTOFF,
+	.func			= event_trigger_callback,
+	.reg			= register_trigger,
+	.unreg			= unregister_trigger,
+	.get_trigger_ops	= ptoff_get_trigger_ops,
+	.set_filter		= set_trigger_filter,
+};
+
+static __init int register_trigger_ptoff_cmd(void)
+{
+	int ret;
+
+	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
+		return 0;
+
+	ret = register_event_command(&trigger_ptoff_cmd);
+	WARN_ON(ret < 0);
+
+	return ret;
+}
+
+#else
+
+static inline int register_trigger_ptoff_cmd(void) { return 0; }
+
+#endif
+
 #ifdef CONFIG_TRACER_SNAPSHOT
 static void
 snapshot_trigger(struct event_trigger_data *data, void *rec)
@@ -1609,6 +1687,7 @@ __init int register_trigger_cmds(void)
 	register_trigger_enable_disable_cmds();
 	register_trigger_hist_enable_disable_cmds();
 	register_trigger_hist_cmd();
+	register_trigger_ptoff_cmd();
 
 	return 0;
 }
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 0efa00d80623..80867e3166f7 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -15,6 +15,7 @@
 #include <linux/ftrace.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
+#include <linux/perf_event.h>
 
 #include "trace.h"
 
@@ -643,6 +644,57 @@ static struct ftrace_func_command ftrace_cpudump_cmd = {
 	.func			= ftrace_cpudump_callback,
 };
 
+#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
+static void
+ftrace_ptoff_probe(unsigned long ip, unsigned long parent_ip, void **data)
+{
+	if (update_count(data))
+		pt_disable();
+}
+
+static int
+ftrace_ptoff_print(struct seq_file *m, unsigned long ip,
+			struct ftrace_probe_ops *ops, void *data)
+{
+	return ftrace_probe_print("ptoff", m, ip, data);
+}
+
+static struct ftrace_probe_ops ptoff_probe_ops = {
+	.func			= ftrace_ptoff_probe,
+	.print			= ftrace_ptoff_print,
+};
+
+static int
+ftrace_ptoff_callback(struct ftrace_hash *hash,
+			   char *glob, char *cmd, char *param, int enable)
+{
+	struct ftrace_probe_ops *ops;
+
+	ops = &ptoff_probe_ops;
+
+	/* Only dump once. */
+	return ftrace_trace_probe_callback(ops, hash, glob, cmd,
+					   "1", enable);
+}
+
+static struct ftrace_func_command ftrace_ptoff_cmd = {
+	.name			= "ptoff",
+	.func			= ftrace_ptoff_callback,
+};
+
+static int register_ptoff_command(void)
+{
+	if (!boot_cpu_has(X86_FEATURE_INTEL_PT))
+		return 0;
+	return register_ftrace_command(&ftrace_ptoff_cmd);
+}
+
+#else
+
+static inline int register_ptoff_command(void) { return 0; }
+
+#endif
+
 static int __init init_func_cmd_traceon(void)
 {
 	int ret;
@@ -667,8 +719,14 @@ static int __init init_func_cmd_traceon(void)
 	if (ret)
 		goto out_free_dump;
 
+	ret = register_ptoff_command();
+	if (ret)
+	    goto out_free_cpudump;
+
 	return 0;
 
+ out_free_cpudump:
+	unregister_ftrace_command(&ftrace_cpudump_cmd);
  out_free_dump:
 	unregister_ftrace_command(&ftrace_dump_cmd);
  out_free_stacktrace:
-- 
2.5.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ