lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424192613.356969984@goodmis.org>
Date: Thu, 24 Apr 2025 15:25:02 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: linux-kernel@...r.kernel.org,
 linux-trace-kernel@...r.kernel.org
Cc: Masami Hiramatsu <mhiramat@...nel.org>,
 Mark Rutland <mark.rutland@....com>,
 Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
 Andrew Morton <akpm@...ux-foundation.org>,
 Josh Poimboeuf <jpoimboe@...nel.org>,
 x86@...nel.org,
 Peter Zijlstra <peterz@...radead.org>,
 Ingo Molnar <mingo@...nel.org>,
 Arnaldo Carvalho de Melo <acme@...nel.org>,
 Indu Bhagat <indu.bhagat@...cle.com>,
 Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
 Jiri Olsa <jolsa@...nel.org>,
 Namhyung Kim <namhyung@...nel.org>,
 Ian Rogers <irogers@...gle.com>,
 Adrian Hunter <adrian.hunter@...el.com>,
 linux-perf-users@...r.kernel.org,
 Mark Brown <broonie@...nel.org>,
 linux-toolchains@...r.kernel.org,
 Jordan Rome <jordalgo@...a.com>,
 Sam James <sam@...too.org>,
 Andrii Nakryiko <andrii.nakryiko@...il.com>,
 Jens Remus <jremus@...ux.ibm.com>,
 Florian Weimer <fweimer@...hat.com>,
 Andy Lutomirski <luto@...nel.org>,
 Weinan Liu <wnliu@...gle.com>,
 Blake Jones <blakejones@...gle.com>,
 Beau Belgrave <beaub@...ux.microsoft.com>,
 "Jose E. Marchesi" <jemarch@....org>,
 Alexander Aring <aahringo@...hat.com>
Subject: [PATCH v5 6/9] tracing: Implement deferred user space stacktracing

From: Steven Rostedt <rostedt@...dmis.org>

Use the unwind_deferred_*() interface to be able to trace deferred user
space stacks. This creates two new ftrace events:

  user_unwind_cookie
  user_unwind_stack

The user_unwind_cookie will record into the ring buffer the cookie given
from unwind_deferred_request(), and the user_unwind_stack will record into
the ring buffer the user space stack as well as the cookie associated with
it.

Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
 kernel/trace/trace.c         | 93 ++++++++++++++++++++++++++++++++++++
 kernel/trace/trace.h         | 12 +++++
 kernel/trace/trace_entries.h | 24 ++++++++++
 kernel/trace/trace_export.c  | 23 +++++++++
 kernel/trace/trace_output.c  | 72 ++++++++++++++++++++++++++++
 5 files changed, 224 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 523e98cd121d..71340207321e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3077,6 +3077,66 @@ EXPORT_SYMBOL_GPL(trace_dump_stack);
 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
 static DEFINE_PER_CPU(int, user_stack_count);
 
+static void trace_user_unwind_callback(struct unwind_work *unwind,
+				       struct unwind_stacktrace *trace,
+				       u64 ctx_cookie)
+{
+	struct trace_array *tr = container_of(unwind, struct trace_array, unwinder);
+	struct trace_buffer *buffer = tr->array_buffer.buffer;
+	struct userunwind_stack_entry *entry;
+	struct ring_buffer_event *event;
+	unsigned int trace_ctx;
+	unsigned long *caller;
+	unsigned int offset;
+	int len;
+	int i;
+
+	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE_DELAY))
+		return;
+
+	len = trace->nr * sizeof(unsigned long) + sizeof(*entry);
+
+	trace_ctx = tracing_gen_ctx();
+	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_UNWIND_STACK,
+					    len, trace_ctx);
+	if (!event)
+		return;
+
+	entry	= ring_buffer_event_data(event);
+
+	entry->cookie = ctx_cookie;
+
+	offset = sizeof(*entry);
+	len = sizeof(unsigned long) * trace->nr;
+
+	entry->__data_loc_stack = offset | (len << 16);
+	caller = (void *)entry + offset;
+
+	for (i = 0; i < trace->nr; i++) {
+		caller[i] = trace->entries[i];
+	}
+
+	__buffer_unlock_commit(buffer, event);
+}
+
+static void
+ftrace_trace_userstack_delay(struct trace_array *tr,
+			     struct trace_buffer *buffer, unsigned int trace_ctx)
+{
+	struct userunwind_cookie_entry *entry;
+	struct ring_buffer_event *event;
+
+	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_UNWIND_COOKIE,
+					    sizeof(*entry), trace_ctx);
+	if (!event)
+		return;
+	entry	= ring_buffer_event_data(event);
+
+	unwind_deferred_request(&tr->unwinder, &entry->cookie);
+
+	__buffer_unlock_commit(buffer, event);
+}
+
 static void
 ftrace_trace_userstack(struct trace_array *tr,
 		       struct trace_buffer *buffer, unsigned int trace_ctx)
@@ -3091,6 +3151,11 @@ ftrace_trace_userstack(struct trace_array *tr,
 	if (current->flags & PF_KTHREAD)
 		return;
 
+	if (tr->trace_flags & TRACE_ITER_USERSTACKTRACE_DELAY) {
+		ftrace_trace_userstack_delay(tr, buffer, trace_ctx);
+		return;
+	}
+
 	/*
 	 * NMIs can not handle page faults, even with fix ups.
 	 * The save user stack can (and often does) fault.
@@ -5189,6 +5254,17 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
 	return 0;
 }
 
+static int update_unwind_deferred(struct trace_array *tr, int enabled)
+{
+	if (enabled) {
+		return unwind_deferred_init(&tr->unwinder,
+					    trace_user_unwind_callback);
+	} else {
+		unwind_deferred_cancel(&tr->unwinder);
+		return 0;
+	}
+}
+
 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
 {
 	if ((mask == TRACE_ITER_RECORD_TGID) ||
@@ -5224,6 +5300,19 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
 		}
 	}
 
+	if (mask == TRACE_ITER_USERSTACKTRACE) {
+		if (tr->trace_flags & TRACE_ITER_USERSTACKTRACE_DELAY) {
+			int ret = update_unwind_deferred(tr, enabled);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	if (mask == TRACE_ITER_USERSTACKTRACE_DELAY) {
+		if (tr->trace_flags & TRACE_ITER_USERSTACKTRACE)
+			update_unwind_deferred(tr, enabled);
+	}
+
 	if (enabled)
 		tr->trace_flags |= mask;
 	else
@@ -9890,6 +9979,10 @@ static int __remove_instance(struct trace_array *tr)
 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
 		return -EBUSY;
 
+	if ((tr->flags & (TRACE_ITER_USERSTACKTRACE & TRACE_ITER_USERSTACKTRACE_DELAY)) ==
+	    (TRACE_ITER_USERSTACKTRACE & TRACE_ITER_USERSTACKTRACE_DELAY))
+		unwind_deferred_cancel(&tr->unwinder);
+
 	list_del(&tr->list);
 
 	/* Disable all the flags that were enabled coming in */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3c733b9e7b32..3f0941c9215c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/clocksource.h>
 #include <linux/ring_buffer.h>
+#include <linux/unwind_deferred.h>
 #include <linux/mmiotrace.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
@@ -49,7 +50,10 @@ enum trace_type {
 	TRACE_GRAPH_ENT,
 	TRACE_GRAPH_RETADDR_ENT,
 	TRACE_USER_STACK,
+	/* trace-cmd manually adds blktrace after USER_STACK */
 	TRACE_BLK,
+	TRACE_USER_UNWIND_STACK,
+	TRACE_USER_UNWIND_COOKIE,
 	TRACE_BPUTS,
 	TRACE_HWLAT,
 	TRACE_OSNOISE,
@@ -92,6 +96,9 @@ enum trace_type {
 #undef __array_desc
 #define __array_desc(type, container, item, size)
 
+#undef __dynamic_array
+#define __dynamic_array(type, item)	u32	__data_loc_##item;
+
 #undef __dynamic_field
 #define __dynamic_field(type, item)	type	item[];
 
@@ -435,6 +442,7 @@ struct trace_array {
 	struct cond_snapshot	*cond_snapshot;
 #endif
 	struct trace_func_repeats	__percpu *last_func_repeats;
+	struct unwind_work	unwinder;
 	/*
 	 * On boot up, the ring buffer is set to the minimum size, so that
 	 * we do not waste memory on systems that are not using tracing.
@@ -526,6 +534,9 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
+		IF_ASSIGN(var, ent, struct userunwind_stack_entry, TRACE_USER_UNWIND_STACK);\
+		IF_ASSIGN(var, ent, struct userunwind_cookie_entry, TRACE_USER_UNWIND_COOKIE);\
+		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
 		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
 		IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS);	\
@@ -1356,6 +1367,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
 		C(PRINTK,		"trace_printk"),	\
 		C(ANNOTATE,		"annotate"),		\
 		C(USERSTACKTRACE,	"userstacktrace"),	\
+		C(USERSTACKTRACE_DELAY,	"userstacktrace_delay"),\
 		C(SYM_USEROBJ,		"sym-userobj"),		\
 		C(PRINTK_MSGONLY,	"printk-msg-only"),	\
 		C(CONTEXT_INFO,		"context-info"),   /* Print pid/cpu/time */ \
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 7100d8f86011..752a99296c95 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -249,6 +249,30 @@ FTRACE_ENTRY(user_stack, userstack_entry,
 		 (void *)__entry->caller[6], (void *)__entry->caller[7])
 );
 
+FTRACE_ENTRY(user_unwind_stack, userunwind_stack_entry,
+
+	TRACE_USER_UNWIND_STACK,
+
+	F_STRUCT(
+		__field(		u64,		cookie	)
+		__dynamic_array(	unsigned long,	stack	)
+	),
+
+	F_printk("cookie=%lld\n%s", __entry->cookie,
+		 __print_dynamic_array(stack, sizeof(unsigned long)))
+);
+
+FTRACE_ENTRY(user_unwind_cookie, userunwind_cookie_entry,
+
+	TRACE_USER_UNWIND_COOKIE,
+
+	F_STRUCT(
+		__field(		u64,		cookie	)
+	),
+
+	F_printk("cookie=%lld", __entry->cookie)
+);
+
 /*
  * trace_printk entry:
  */
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d9d41e3ba379..831999f84e2c 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -57,6 +57,9 @@ static int ftrace_event_register(struct trace_event_call *call,
 #undef __array_desc
 #define __array_desc(type, container, item, size)	type item[size];
 
+#undef __dynamic_array
+#define __dynamic_array(type, item)			u32 __data_loc_##item;
+
 #undef __dynamic_field
 #define __dynamic_field(type, item)			type item[];
 
@@ -66,6 +69,16 @@ static int ftrace_event_register(struct trace_event_call *call,
 #undef F_printk
 #define F_printk(fmt, args...) fmt, args
 
+/* Only used for ftrace event format output */
+static inline char * __print_dynamic_array(int array, size_t size)
+{
+	return NULL;
+}
+
+#undef __print_dynamic_array
+#define __print_dynamic_array(array, el_size)				\
+	__print_dynamic_array(__entry->__data_loc_##array, el_size)
+
 #undef FTRACE_ENTRY
 #define FTRACE_ENTRY(name, struct_name, id, tstruct, print)		\
 struct ____ftrace_##name {						\
@@ -74,6 +87,7 @@ struct ____ftrace_##name {						\
 static void __always_unused ____ftrace_check_##name(void)		\
 {									\
 	struct ____ftrace_##name *__entry = NULL;			\
+	struct trace_seq __maybe_unused *p = NULL;			\
 									\
 	/* force compile-time check on F_printk() */			\
 	printk(print);							\
@@ -123,6 +137,12 @@ static void __always_unused ____ftrace_check_##name(void)		\
 #undef __array_desc
 #define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
 
+#undef __dynamic_array
+#define __dynamic_array(_type, _item) {					\
+	.type = "__data_loc " #_type "[]", .name = #_item,		\
+	.size = 4, .align = __alignof__(4),				\
+	is_signed_type(_type), .filter_type = FILTER_OTHER },
+
 #undef __dynamic_field
 #define __dynamic_field(_type, _item) {					\
 	.type = #_type "[]", .name = #_item,				\
@@ -161,6 +181,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = {	\
 #undef __array_desc
 #define __array_desc(type, container, item, len)
 
+#undef __dynamic_array
+#define __dynamic_array(type, item)
+
 #undef __dynamic_field
 #define __dynamic_field(type, item)
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index fee40ffbd490..e11911e5f7d0 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1374,6 +1374,58 @@ static struct trace_event trace_stack_event = {
 };
 
 /* TRACE_USER_STACK */
+static enum print_line_t trace_user_unwind_stack_print(struct trace_iterator *iter,
+						int flags, struct trace_event *event)
+{
+	struct userunwind_stack_entry *field;
+	struct trace_seq *s = &iter->seq;
+	unsigned long *caller;
+	unsigned int offset;
+	unsigned int len;
+	unsigned int caller_cnt;
+	unsigned int i;
+
+	trace_assign_type(field, iter->ent);
+
+	trace_seq_puts(s, "<user stack unwind>\n");
+
+	trace_seq_printf(s, "cookie=%llx\n", field->cookie);
+
+	/* The stack field is a dynamic pointer */
+	offset = field->__data_loc_stack;
+	len = offset >> 16;
+	offset = offset & 0xffff;
+	caller_cnt = len / sizeof(*caller);
+
+	caller = (void *)iter->ent + offset;
+
+	for (i = 0; i < caller_cnt; i++) {
+		unsigned long ip = caller[i];
+
+		if (!ip || trace_seq_has_overflowed(s))
+			break;
+
+		trace_seq_puts(s, " => ");
+		seq_print_user_ip(s, NULL, ip, flags);
+		trace_seq_putc(s, '\n');
+	}
+
+	return trace_handle_return(s);
+}
+
+static enum print_line_t trace_user_unwind_cookie_print(struct trace_iterator *iter,
+						 int flags, struct trace_event *event)
+{
+	struct userunwind_cookie_entry *field;
+	struct trace_seq *s = &iter->seq;
+
+	trace_assign_type(field, iter->ent);
+
+	trace_seq_printf(s, "cookie=%llx\n", field->cookie);
+
+	return trace_handle_return(s);
+}
+
 static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 						int flags, struct trace_event *event)
 {
@@ -1417,6 +1469,24 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 	return trace_handle_return(s);
 }
 
+static struct trace_event_functions trace_userunwind_stack_funcs = {
+	.trace		= trace_user_unwind_stack_print,
+};
+
+static struct trace_event trace_userunwind_stack_event = {
+	.type		= TRACE_USER_UNWIND_STACK,
+	.funcs		= &trace_userunwind_stack_funcs,
+};
+
+static struct trace_event_functions trace_userunwind_cookie_funcs = {
+	.trace		= trace_user_unwind_cookie_print,
+};
+
+static struct trace_event trace_userunwind_cookie_event = {
+	.type		= TRACE_USER_UNWIND_COOKIE,
+	.funcs		= &trace_userunwind_cookie_funcs,
+};
+
 static struct trace_event_functions trace_user_stack_funcs = {
 	.trace		= trace_user_stack_print,
 };
@@ -1816,6 +1886,8 @@ static struct trace_event *events[] __initdata = {
 	&trace_ctx_event,
 	&trace_wake_event,
 	&trace_stack_event,
+	&trace_userunwind_cookie_event,
+	&trace_userunwind_stack_event,
 	&trace_user_stack_event,
 	&trace_bputs_event,
 	&trace_bprint_event,
-- 
2.47.2



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ