lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20101117120740.GA24972@elte.hu>
Date:	Wed, 17 Nov 2010 13:07:40 +0100
From:	Ingo Molnar <mingo@...e.hu>
To:	Pekka Enberg <penberg@...nel.org>
Cc:	Thomas Gleixner <tglx@...utronix.de>,
	Peter Zijlstra <peterz@...radead.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	Arjan van de Ven <arjan@...ux.intel.com>,
	Arnaldo Carvalho de Melo <acme@...hat.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	linux-kernel@...r.kernel.org,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Darren Hart <dvhart@...ux.intel.com>,
	Arjan van de Ven <arjan@...radead.org>
Subject: [patch] trace: Add user-space event tracing/injection


* Pekka Enberg <penberg@...nel.org> wrote:

> (Trimming CC for sanity.)

[ Added back Linus (in case he wants to object to the new prctl()) and lkml. ]

> On 11/16/10 11:04 PM, Thomas Gleixner wrote:
> >'trace' is our shot at improving the situation: it aims at providing a
> >simple to use and straightforward tracing tool based on the perf
> >infrastructure and on the well-known perf profiling workflow:
> >
> >    trace record firefox          # trace firefox and all child tasks,
> >                                    put it into trace.data
> >
> >    trace summary                 # show quick overview of the trace,
> >                                    based on trace.data
> >
> >    trace report                  # detailed traces, based on trace.data
> 
> Nice work guys!
> 
> Does this concept lend itself to tracing latencies in userspace applications that 
> run in virtual machines (e.g. the Java kind)? I'm of course interested in this 
> because of Jato [1] where bunch of interesting things can cause jitter: JIT 
> compilation, GC, kernel, and the actual application doing something (in either 
> native code or JIT'd code). It's important to be able to measure where "slowness" 
> to desktop applications and certain class of server applications comes from to be 
> able to improve things.

Makes quite a bit of sense.

How about the attached patch? It works fine with the simple testcase included in the 
changelog. There's a common-sense limit on the message size - but otherwise it adds 
support for apps to generate a free-form string trace event.

Thanks,

	Ingo

---------------------------------->
Subject: trace: Add user-space event tracing/injection
From: Ingo Molnar <mingo@...e.hu>
Date: Wed Nov 17 10:11:53 CET 2010

This feature (suggested by Darren Hart and Pekka Engberg) allows user-space
programs to print trace events in a very simple and self-contained way:

 #include <sys/prctl.h>
 #include <string.h>

 #define PR_TASK_PERF_USER_TRACE 35

 int main(void)
 {
         char *msg = "Hello World!\n";

         prctl(PR_TASK_PERF_USER_TRACE, msg, strlen(msg));

         return 0;
 }

These show up in 'trace' output as:

 $ trace report
 #
 # trace events of 'sleep 1':
 #
        testit/ 6006 ( 0.002 ms): <"Hello World!">
        testit/ 6006 ( 0.002 ms): <"Hello World!">

Suggested-by: Darren Hart <dvhart@...ux.intel.com>
Suggested-by: Pekka Enberg <penberg@...nel.org>
Signed-off-by: Ingo Molnar <mingo@...e.hu>
---
 include/linux/prctl.h       |    2 ++
 include/trace/events/user.h |   32 ++++++++++++++++++++++++++++++++
 kernel/sys.c                |   23 +++++++++++++++++++++++
 kernel/trace/trace_events.c |   34 +++++++++++-----------------------
 tools/perf/builtin-trace.c  |   41 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 109 insertions(+), 23 deletions(-)

Index: linux/include/linux/prctl.h
===================================================================
--- linux.orig/include/linux/prctl.h
+++ linux/include/linux/prctl.h
@@ -102,4 +102,6 @@
 
 #define PR_MCE_KILL_GET 34
 
+#define PR_TASK_PERF_USER_TRACE			35
+
 #endif /* _LINUX_PRCTL_H */
Index: linux/include/trace/events/user.h
===================================================================
--- /dev/null
+++ linux/include/trace/events/user.h
@@ -0,0 +1,32 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM user
+
+#if !defined(_TRACE_USER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_USER_H_
+
+#include <linux/tracepoint.h>
+#include <linux/ftrace.h>
+
+TRACE_EVENT(user,
+
+	TP_PROTO(const char *message),
+
+	TP_ARGS(message),
+
+	TP_STRUCT__entry(
+		__string(	message, message);
+	),
+
+	TP_fast_assign(
+		__assign_str(message, message);
+	),
+
+	TP_printk("user %s", __get_str(message))
+);
+
+#undef NO_DEV
+
+#endif /* _TRACE_USER_H_ */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
Index: linux/kernel/sys.c
===================================================================
--- linux.orig/kernel/sys.c
+++ linux/kernel/sys.c
@@ -47,6 +47,11 @@
 #include <asm/io.h>
 #include <asm/unistd.h>
 
+#define MAX_USER_TRACE_SIZE 128
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/user.h>
+
 #ifndef SET_UNALIGN_CTL
 # define SET_UNALIGN_CTL(a,b)	(-EINVAL)
 #endif
@@ -1681,6 +1686,24 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
 		case PR_TASK_PERF_EVENTS_ENABLE:
 			error = perf_event_task_enable();
 			break;
+		/*
+		 * Inject a trace event into the current tracing context:
+		 */
+		case PR_TASK_PERF_USER_TRACE:
+		{
+			void __user *uevent_ptr = (void *)arg2;
+			char kstring[MAX_USER_TRACE_SIZE+1];
+			unsigned long uevent_len = arg3;
+
+			if (uevent_len > MAX_USER_TRACE_SIZE)
+				return -EINVAL;
+			if (copy_from_user(kstring, uevent_ptr, uevent_len))
+				return -EFAULT;
+			kstring[uevent_len] = 0;
+
+			trace_user(kstring);
+			return 0;
+		}
 		case PR_GET_TIMERSLACK:
 			error = current->timer_slack_ns;
 			break;
Index: linux/kernel/trace/trace_events.c
===================================================================
--- linux.orig/kernel/trace/trace_events.c
+++ linux/kernel/trace/trace_events.c
@@ -989,8 +989,7 @@ static struct dentry *event_trace_events
 
 	d_events = debugfs_create_dir("events", d_tracer);
 	if (!d_events)
-		pr_warning("Could not create debugfs "
-			   "'events' directory\n");
+		pr_warning("Could not create debugfs 'events' directory\n");
 
 	return d_events;
 }
@@ -1014,15 +1013,13 @@ event_subsystem_dir(const char *name, st
 	/* need to create new entry */
 	system = kmalloc(sizeof(*system), GFP_KERNEL);
 	if (!system) {
-		pr_warning("No memory to create event subsystem %s\n",
-			   name);
+		pr_warning("No memory to create event subsystem %s\n", name);
 		return d_events;
 	}
 
 	system->entry = debugfs_create_dir(name, d_events);
 	if (!system->entry) {
-		pr_warning("Could not create event subsystem %s\n",
-			   name);
+		pr_warning("Could not create event subsystem %s\n", name);
 		kfree(system);
 		return d_events;
 	}
@@ -1041,8 +1038,7 @@ event_subsystem_dir(const char *name, st
 
 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
 	if (!system->filter) {
-		pr_warning("Could not allocate filter for subsystem "
-			   "'%s'\n", name);
+		pr_warning("Could not allocate filter for subsystem '%s'\n", name);
 		return system->entry;
 	}
 
@@ -1051,8 +1047,7 @@ event_subsystem_dir(const char *name, st
 	if (!entry) {
 		kfree(system->filter);
 		system->filter = NULL;
-		pr_warning("Could not create debugfs "
-			   "'%s/filter' entry\n", name);
+		pr_warning("Could not create debugfs '%s/filter' entry\n", name);
 	}
 
 	trace_create_file("enable", 0644, system->entry,
@@ -1081,19 +1076,16 @@ event_create_dir(struct ftrace_event_cal
 
 	call->dir = debugfs_create_dir(call->name, d_events);
 	if (!call->dir) {
-		pr_warning("Could not create debugfs "
-			   "'%s' directory\n", call->name);
+		pr_warning("Could not create debugfs '%s' directory\n", call->name);
 		return -1;
 	}
 
 	if (call->class->reg)
-		trace_create_file("enable", 0644, call->dir, call,
-				  enable);
+		trace_create_file("enable", 0644, call->dir, call, enable);
 
 #ifdef CONFIG_PERF_EVENTS
 	if (call->event.type && call->class->reg)
-		trace_create_file("id", 0444, call->dir, call,
-		 		  id);
+		trace_create_file("id", 0444, call->dir, call, id);
 #endif
 
 	/*
@@ -1104,16 +1096,12 @@ event_create_dir(struct ftrace_event_cal
 	if (list_empty(head)) {
 		ret = call->class->define_fields(call);
 		if (ret < 0) {
-			pr_warning("Could not initialize trace point"
-				   " events/%s\n", call->name);
+			pr_warning("Could not initialize trace point events/%s\n", call->name);
 			return ret;
 		}
 	}
-	trace_create_file("filter", 0644, call->dir, call,
-			  filter);
-
-	trace_create_file("format", 0444, call->dir, call,
-			  format);
+	trace_create_file("filter", 0644, call->dir, call, filter);
+	trace_create_file("format", 0444, call->dir, call, format);
 
 	return 0;
 }
Index: linux/tools/perf/builtin-trace.c
===================================================================
--- linux.orig/tools/perf/builtin-trace.c
+++ linux/tools/perf/builtin-trace.c
@@ -719,6 +719,44 @@ static void vfs_getname(void *data,
 	}
 }
 
+static void user_event(void *data,
+			struct event *event __used,
+			int cpu __used,
+			u64 timestamp __used,
+			struct thread *thread __used)
+{
+	struct thread_data *tdata = get_thread_data(thread);
+	unsigned int i;
+	char *message;
+	u64 t = 0;
+
+	if (!tdata->enabled)
+		return;
+	if (time_filtered(timestamp))
+		return;
+	if (cpu_filtered(cpu))
+		return;
+	if (tdata->entry_time) {
+		t = timestamp - tdata->entry_time;
+		if (filter_duration(t))
+			return;
+	} else if (duration_filter)
+		return;
+
+	print_entry_head(thread, timestamp, t, cpu);
+	message = strdup(raw_field_ptr(event, "message", data));
+
+	/* Sanitize the string a bit before printing it: */
+	for (i = 0; i < strlen(message); i++) {
+		if (message[i] == '\n')
+			message[i] = ' ';
+	}
+
+	color_fprintf(stdout, PERF_COLOR_GREEN, "<\"%s\">\n", message);
+
+	free(message);
+}
+
 static int pagefault_preprocess_sample(const event_t *self,
 				       struct addr_location *al,
 				       struct sample_data *data,
@@ -1036,6 +1074,8 @@ process_raw_event(event_t *self, void *d
 		pagefault_exit(data, event, cpu, timestamp, thread);
 	if (!strcmp(event->name, "vfs_getname"))
 		vfs_getname(data, event, cpu, timestamp, thread);
+	if (!strcmp(event->name, "user"))
+		user_event(data, event, cpu, timestamp, thread);
 
 	if (!scheduler_events && !scheduler_all_events)
 		return;
@@ -1226,6 +1266,7 @@ static const char *record_args[] = {
 	"-e", "sched:sched_stat_sleep:r",
 	"-e", "sched:sched_stat_iowait:r",
 	"-e", "sched:sched_stat_runtime:r",
+	"-e", "user:user:r",
 };
 
 static int __cmd_record(int argc, const char **argv)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ