[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424192613.356969984@goodmis.org>
Date: Thu, 24 Apr 2025 15:25:02 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Cc: Masami Hiramatsu <mhiramat@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Andrew Morton <akpm@...ux-foundation.org>,
Josh Poimboeuf <jpoimboe@...nel.org>,
x86@...nel.org,
Peter Zijlstra <peterz@...radead.org>,
Ingo Molnar <mingo@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Indu Bhagat <indu.bhagat@...cle.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
linux-perf-users@...r.kernel.org,
Mark Brown <broonie@...nel.org>,
linux-toolchains@...r.kernel.org,
Jordan Rome <jordalgo@...a.com>,
Sam James <sam@...too.org>,
Andrii Nakryiko <andrii.nakryiko@...il.com>,
Jens Remus <jremus@...ux.ibm.com>,
Florian Weimer <fweimer@...hat.com>,
Andy Lutomirski <luto@...nel.org>,
Weinan Liu <wnliu@...gle.com>,
Blake Jones <blakejones@...gle.com>,
Beau Belgrave <beaub@...ux.microsoft.com>,
"Jose E. Marchesi" <jemarch@....org>,
Alexander Aring <aahringo@...hat.com>
Subject: [PATCH v5 6/9] tracing: Implement deferred user space stacktracing
From: Steven Rostedt <rostedt@...dmis.org>
Use the unwind_deferred_*() interface to be able to trace deferred user
space stacks. This creates two new ftrace events:
user_unwind_cookie
user_unwind_stack
The user_unwind_cookie will record into the ring buffer the cookie given
from unwind_deferred_request(), and the user_unwind_stack will record into
the ring buffer the user space stack as well as the cookie associated with
it.
Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
kernel/trace/trace.c | 93 ++++++++++++++++++++++++++++++++++++
kernel/trace/trace.h | 12 +++++
kernel/trace/trace_entries.h | 24 ++++++++++
kernel/trace/trace_export.c | 23 +++++++++
kernel/trace/trace_output.c | 72 ++++++++++++++++++++++++++++
5 files changed, 224 insertions(+)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 523e98cd121d..71340207321e 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3077,6 +3077,66 @@ EXPORT_SYMBOL_GPL(trace_dump_stack);
#ifdef CONFIG_USER_STACKTRACE_SUPPORT
static DEFINE_PER_CPU(int, user_stack_count);
+static void trace_user_unwind_callback(struct unwind_work *unwind,
+ struct unwind_stacktrace *trace,
+ u64 ctx_cookie)
+{
+ struct trace_array *tr = container_of(unwind, struct trace_array, unwinder);
+ struct trace_buffer *buffer = tr->array_buffer.buffer;
+ struct userunwind_stack_entry *entry;
+ struct ring_buffer_event *event;
+ unsigned int trace_ctx;
+ unsigned long *caller;
+ unsigned int offset;
+ int len;
+ int i;
+
+ if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE_DELAY))
+ return;
+
+ len = trace->nr * sizeof(unsigned long) + sizeof(*entry);
+
+ trace_ctx = tracing_gen_ctx();
+ event = __trace_buffer_lock_reserve(buffer, TRACE_USER_UNWIND_STACK,
+ len, trace_ctx);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+
+ entry->cookie = ctx_cookie;
+
+ offset = sizeof(*entry);
+ len = sizeof(unsigned long) * trace->nr;
+
+ entry->__data_loc_stack = offset | (len << 16);
+ caller = (void *)entry + offset;
+
+ for (i = 0; i < trace->nr; i++) {
+ caller[i] = trace->entries[i];
+ }
+
+ __buffer_unlock_commit(buffer, event);
+}
+
+static void
+ftrace_trace_userstack_delay(struct trace_array *tr,
+ struct trace_buffer *buffer, unsigned int trace_ctx)
+{
+ struct userunwind_cookie_entry *entry;
+ struct ring_buffer_event *event;
+
+ event = __trace_buffer_lock_reserve(buffer, TRACE_USER_UNWIND_COOKIE,
+ sizeof(*entry), trace_ctx);
+ if (!event)
+ return;
+ entry = ring_buffer_event_data(event);
+
+ unwind_deferred_request(&tr->unwinder, &entry->cookie);
+
+ __buffer_unlock_commit(buffer, event);
+}
+
static void
ftrace_trace_userstack(struct trace_array *tr,
struct trace_buffer *buffer, unsigned int trace_ctx)
@@ -3091,6 +3151,11 @@ ftrace_trace_userstack(struct trace_array *tr,
if (current->flags & PF_KTHREAD)
return;
+ if (tr->trace_flags & TRACE_ITER_USERSTACKTRACE_DELAY) {
+ ftrace_trace_userstack_delay(tr, buffer, trace_ctx);
+ return;
+ }
+
/*
* NMIs can not handle page faults, even with fix ups.
* The save user stack can (and often does) fault.
@@ -5189,6 +5254,17 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
return 0;
}
+static int update_unwind_deferred(struct trace_array *tr, int enabled)
+{
+ if (enabled) {
+ return unwind_deferred_init(&tr->unwinder,
+ trace_user_unwind_callback);
+ } else {
+ unwind_deferred_cancel(&tr->unwinder);
+ return 0;
+ }
+}
+
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
if ((mask == TRACE_ITER_RECORD_TGID) ||
@@ -5224,6 +5300,19 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
}
}
+ if (mask == TRACE_ITER_USERSTACKTRACE) {
+ if (tr->trace_flags & TRACE_ITER_USERSTACKTRACE_DELAY) {
+ int ret = update_unwind_deferred(tr, enabled);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ if (mask == TRACE_ITER_USERSTACKTRACE_DELAY) {
+ if (tr->trace_flags & TRACE_ITER_USERSTACKTRACE)
+ update_unwind_deferred(tr, enabled);
+ }
+
if (enabled)
tr->trace_flags |= mask;
else
@@ -9890,6 +9979,10 @@ static int __remove_instance(struct trace_array *tr)
if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
return -EBUSY;
+ if ((tr->flags & (TRACE_ITER_USERSTACKTRACE & TRACE_ITER_USERSTACKTRACE_DELAY)) ==
+ (TRACE_ITER_USERSTACKTRACE & TRACE_ITER_USERSTACKTRACE_DELAY))
+ unwind_deferred_cancel(&tr->unwinder);
+
list_del(&tr->list);
/* Disable all the flags that were enabled coming in */
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3c733b9e7b32..3f0941c9215c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
#include <linux/sched.h>
#include <linux/clocksource.h>
#include <linux/ring_buffer.h>
+#include <linux/unwind_deferred.h>
#include <linux/mmiotrace.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
@@ -49,7 +50,10 @@ enum trace_type {
TRACE_GRAPH_ENT,
TRACE_GRAPH_RETADDR_ENT,
TRACE_USER_STACK,
+ /* trace-cmd manually adds blktrace after USER_STACK */
TRACE_BLK,
+ TRACE_USER_UNWIND_STACK,
+ TRACE_USER_UNWIND_COOKIE,
TRACE_BPUTS,
TRACE_HWLAT,
TRACE_OSNOISE,
@@ -92,6 +96,9 @@ enum trace_type {
#undef __array_desc
#define __array_desc(type, container, item, size)
+#undef __dynamic_array
+#define __dynamic_array(type, item) u32 __data_loc_##item;
+
#undef __dynamic_field
#define __dynamic_field(type, item) type item[];
@@ -435,6 +442,7 @@ struct trace_array {
struct cond_snapshot *cond_snapshot;
#endif
struct trace_func_repeats __percpu *last_func_repeats;
+ struct unwind_work unwinder;
/*
* On boot up, the ring buffer is set to the minimum size, so that
* we do not waste memory on systems that are not using tracing.
@@ -526,6 +534,9 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct ctx_switch_entry, 0); \
IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
+ IF_ASSIGN(var, ent, struct userunwind_stack_entry, TRACE_USER_UNWIND_STACK);\
+ IF_ASSIGN(var, ent, struct userunwind_cookie_entry, TRACE_USER_UNWIND_COOKIE);\
+ IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
IF_ASSIGN(var, ent, struct bputs_entry, TRACE_BPUTS); \
@@ -1356,6 +1367,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
C(PRINTK, "trace_printk"), \
C(ANNOTATE, "annotate"), \
C(USERSTACKTRACE, "userstacktrace"), \
+ C(USERSTACKTRACE_DELAY, "userstacktrace_delay"),\
C(SYM_USEROBJ, "sym-userobj"), \
C(PRINTK_MSGONLY, "printk-msg-only"), \
C(CONTEXT_INFO, "context-info"), /* Print pid/cpu/time */ \
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 7100d8f86011..752a99296c95 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -249,6 +249,30 @@ FTRACE_ENTRY(user_stack, userstack_entry,
(void *)__entry->caller[6], (void *)__entry->caller[7])
);
+FTRACE_ENTRY(user_unwind_stack, userunwind_stack_entry,
+
+ TRACE_USER_UNWIND_STACK,
+
+ F_STRUCT(
+ __field( u64, cookie )
+ __dynamic_array( unsigned long, stack )
+ ),
+
+ F_printk("cookie=%lld\n%s", __entry->cookie,
+ __print_dynamic_array(stack, sizeof(unsigned long)))
+);
+
+FTRACE_ENTRY(user_unwind_cookie, userunwind_cookie_entry,
+
+ TRACE_USER_UNWIND_COOKIE,
+
+ F_STRUCT(
+ __field( u64, cookie )
+ ),
+
+ F_printk("cookie=%lld", __entry->cookie)
+);
+
/*
* trace_printk entry:
*/
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index d9d41e3ba379..831999f84e2c 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -57,6 +57,9 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef __array_desc
#define __array_desc(type, container, item, size) type item[size];
+#undef __dynamic_array
+#define __dynamic_array(type, item) u32 __data_loc_##item;
+
#undef __dynamic_field
#define __dynamic_field(type, item) type item[];
@@ -66,6 +69,16 @@ static int ftrace_event_register(struct trace_event_call *call,
#undef F_printk
#define F_printk(fmt, args...) fmt, args
+/* Only used for ftrace event format output */
+static inline char * __print_dynamic_array(int array, size_t size)
+{
+ return NULL;
+}
+
+#undef __print_dynamic_array
+#define __print_dynamic_array(array, el_size) \
+ __print_dynamic_array(__entry->__data_loc_##array, el_size)
+
#undef FTRACE_ENTRY
#define FTRACE_ENTRY(name, struct_name, id, tstruct, print) \
struct ____ftrace_##name { \
@@ -74,6 +87,7 @@ struct ____ftrace_##name { \
static void __always_unused ____ftrace_check_##name(void) \
{ \
struct ____ftrace_##name *__entry = NULL; \
+ struct trace_seq __maybe_unused *p = NULL; \
\
/* force compile-time check on F_printk() */ \
printk(print); \
@@ -123,6 +137,12 @@ static void __always_unused ____ftrace_check_##name(void) \
#undef __array_desc
#define __array_desc(_type, _container, _item, _len) __array(_type, _item, _len)
+#undef __dynamic_array
+#define __dynamic_array(_type, _item) { \
+ .type = "__data_loc " #_type "[]", .name = #_item, \
+ .size = 4, .align = __alignof__(4), \
+ is_signed_type(_type), .filter_type = FILTER_OTHER },
+
#undef __dynamic_field
#define __dynamic_field(_type, _item) { \
.type = #_type "[]", .name = #_item, \
@@ -161,6 +181,9 @@ static struct trace_event_fields ftrace_event_fields_##name[] = { \
#undef __array_desc
#define __array_desc(type, container, item, len)
+#undef __dynamic_array
+#define __dynamic_array(type, item)
+
#undef __dynamic_field
#define __dynamic_field(type, item)
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index fee40ffbd490..e11911e5f7d0 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1374,6 +1374,58 @@ static struct trace_event trace_stack_event = {
};
/* TRACE_USER_STACK */
+static enum print_line_t trace_user_unwind_stack_print(struct trace_iterator *iter,
+ int flags, struct trace_event *event)
+{
+ struct userunwind_stack_entry *field;
+ struct trace_seq *s = &iter->seq;
+ unsigned long *caller;
+ unsigned int offset;
+ unsigned int len;
+ unsigned int caller_cnt;
+ unsigned int i;
+
+ trace_assign_type(field, iter->ent);
+
+ trace_seq_puts(s, "<user stack unwind>\n");
+
+ trace_seq_printf(s, "cookie=%llx\n", field->cookie);
+
+ /* The stack field is a dynamic pointer */
+ offset = field->__data_loc_stack;
+ len = offset >> 16;
+ offset = offset & 0xffff;
+ caller_cnt = len / sizeof(*caller);
+
+ caller = (void *)iter->ent + offset;
+
+ for (i = 0; i < caller_cnt; i++) {
+ unsigned long ip = caller[i];
+
+ if (!ip || trace_seq_has_overflowed(s))
+ break;
+
+ trace_seq_puts(s, " => ");
+ seq_print_user_ip(s, NULL, ip, flags);
+ trace_seq_putc(s, '\n');
+ }
+
+ return trace_handle_return(s);
+}
+
+static enum print_line_t trace_user_unwind_cookie_print(struct trace_iterator *iter,
+ int flags, struct trace_event *event)
+{
+ struct userunwind_cookie_entry *field;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, iter->ent);
+
+ trace_seq_printf(s, "cookie=%llx\n", field->cookie);
+
+ return trace_handle_return(s);
+}
+
static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
int flags, struct trace_event *event)
{
@@ -1417,6 +1469,24 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
return trace_handle_return(s);
}
+static struct trace_event_functions trace_userunwind_stack_funcs = {
+ .trace = trace_user_unwind_stack_print,
+};
+
+static struct trace_event trace_userunwind_stack_event = {
+ .type = TRACE_USER_UNWIND_STACK,
+ .funcs = &trace_userunwind_stack_funcs,
+};
+
+static struct trace_event_functions trace_userunwind_cookie_funcs = {
+ .trace = trace_user_unwind_cookie_print,
+};
+
+static struct trace_event trace_userunwind_cookie_event = {
+ .type = TRACE_USER_UNWIND_COOKIE,
+ .funcs = &trace_userunwind_cookie_funcs,
+};
+
static struct trace_event_functions trace_user_stack_funcs = {
.trace = trace_user_stack_print,
};
@@ -1816,6 +1886,8 @@ static struct trace_event *events[] __initdata = {
&trace_ctx_event,
&trace_wake_event,
&trace_stack_event,
+ &trace_userunwind_cookie_event,
+ &trace_userunwind_stack_event,
&trace_user_stack_event,
&trace_bputs_event,
&trace_bprint_event,
--
2.47.2
Powered by blists - more mailing lists