[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251024145156.GM4068168@noisy.programming.kicks-ass.net>
Date: Fri, 24 Oct 2025 16:51:56 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Jens Remus <jremus@...ux.ibm.com>
Cc: Steven Rostedt <rostedt@...nel.org>, linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org, bpf@...r.kernel.org,
x86@...nel.org, Masami Hiramatsu <mhiramat@...nel.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Josh Poimboeuf <jpoimboe@...nel.org>,
Ingo Molnar <mingo@...nel.org>, Jiri Olsa <jolsa@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Namhyung Kim <namhyung@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Andrii Nakryiko <andrii@...nel.org>,
Indu Bhagat <indu.bhagat@...cle.com>,
"Jose E. Marchesi" <jemarch@....org>,
Beau Belgrave <beaub@...ux.microsoft.com>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Florian Weimer <fweimer@...hat.com>, Sam James <sam@...too.org>,
Kees Cook <kees@...nel.org>, Carlos O'Donell <codonell@...hat.com>,
Heiko Carstens <hca@...ux.ibm.com>,
Vasily Gorbik <gor@...ux.ibm.com>
Subject: Re: [PATCH v16 0/4] perf: Support the deferred unwinding
infrastructure
On Fri, Oct 24, 2025 at 04:08:15PM +0200, Peter Zijlstra wrote:
> Yeah, I suppose that should work. Let me rework things accordingly.
---
Subject: unwind_user/x86: Teach FP unwind about start of function
From: Peter Zijlstra <peterz@...radead.org>
Date: Fri Oct 24 12:31:10 CEST 2025
When userspace is interrupted at the start of a function, before we
get a chance to complete the frame, unwind will miss one caller.
X86 has a uprobe specific fixup for this, add bits to the generic
unwinder to support this.
Suggested-by: Jens Remus <jremus@...ux.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
arch/x86/events/core.c | 40 -------------------------------------
arch/x86/include/asm/unwind_user.h | 12 +++++++++++
arch/x86/include/asm/uprobes.h | 9 ++++++++
arch/x86/kernel/uprobes.c | 32 +++++++++++++++++++++++++++++
include/linux/unwind_user_types.h | 1
kernel/unwind/user.c | 35 ++++++++++++++++++++++++--------
6 files changed, 80 insertions(+), 49 deletions(-)
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2845,46 +2845,6 @@ static unsigned long get_segment_base(un
return get_desc_base(desc);
}
-#ifdef CONFIG_UPROBES
-/*
- * Heuristic-based check if uprobe is installed at the function entry.
- *
- * Under assumption of user code being compiled with frame pointers,
- * `push %rbp/%ebp` is a good indicator that we indeed are.
- *
- * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
- * If we get this wrong, captured stack trace might have one extra bogus
- * entry, but the rest of stack trace will still be meaningful.
- */
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
- struct arch_uprobe *auprobe;
-
- if (!current->utask)
- return false;
-
- auprobe = current->utask->auprobe;
- if (!auprobe)
- return false;
-
- /* push %rbp/%ebp */
- if (auprobe->insn[0] == 0x55)
- return true;
-
- /* endbr64 (64-bit only) */
- if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
- return true;
-
- return false;
-}
-
-#else
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
- return false;
-}
-#endif /* CONFIG_UPROBES */
-
#ifdef CONFIG_IA32_EMULATION
#include <linux/compat.h>
--- a/arch/x86/include/asm/unwind_user.h
+++ b/arch/x86/include/asm/unwind_user.h
@@ -3,6 +3,7 @@
#define _ASM_X86_UNWIND_USER_H
#include <asm/ptrace.h>
+#include <asm/uprobes.h>
#define ARCH_INIT_USER_FP_FRAME(ws) \
.cfa_off = 2*(ws), \
@@ -10,6 +11,12 @@
.fp_off = -2*(ws), \
.use_fp = true,
+#define ARCH_INIT_USER_FP_ENTRY_FRAME(ws) \
+ .cfa_off = 1*(ws), \
+ .ra_off = -1*(ws), \
+ .fp_off = 0, \
+ .use_fp = false,
+
static inline int unwind_user_word_size(struct pt_regs *regs)
{
/* We can't unwind VM86 stacks */
@@ -22,4 +29,9 @@ static inline int unwind_user_word_size(
return sizeof(long);
}
+static inline bool unwind_user_at_function_start(struct pt_regs *regs)
+{
+ return is_uprobe_at_func_entry(regs);
+}
+
#endif /* _ASM_X86_UNWIND_USER_H */
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -62,4 +62,13 @@ struct arch_uprobe_task {
unsigned int saved_tf;
};
+#ifdef CONFIG_UPROBES
+extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ return false;
+}
+#endif /* CONFIG_UPROBES */
+
#endif /* _ASM_UPROBES_H */
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1791,3 +1791,35 @@ bool arch_uretprobe_is_alive(struct retu
else
return regs->sp <= ret->stack;
}
+
+/*
+ * Heuristic-based check if uprobe is installed at the function entry.
+ *
+ * Under assumption of user code being compiled with frame pointers,
+ * `push %rbp/%ebp` is a good indicator that we indeed are.
+ *
+ * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
+ * If we get this wrong, captured stack trace might have one extra bogus
+ * entry, but the rest of stack trace will still be meaningful.
+ */
+bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+ struct arch_uprobe *auprobe;
+
+ if (!current->utask)
+ return false;
+
+ auprobe = current->utask->auprobe;
+ if (!auprobe)
+ return false;
+
+ /* push %rbp/%ebp */
+ if (auprobe->insn[0] == 0x55)
+ return true;
+
+ /* endbr64 (64-bit only) */
+ if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
+ return true;
+
+ return false;
+}
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -39,6 +39,7 @@ struct unwind_user_state {
unsigned int ws;
enum unwind_user_type current_type;
unsigned int available_types;
+ bool topmost;
bool done;
};
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -26,14 +26,12 @@ get_user_word(unsigned long *word, unsig
return get_user(*word, addr);
}
-static int unwind_user_next_fp(struct unwind_user_state *state)
+static int unwind_user_next_common(struct unwind_user_state *state,
+ const struct unwind_user_frame *frame)
{
- const struct unwind_user_frame frame = {
- ARCH_INIT_USER_FP_FRAME(state->ws)
- };
unsigned long cfa, fp, ra;
- if (frame.use_fp) {
+ if (frame->use_fp) {
if (state->fp < state->sp)
return -EINVAL;
cfa = state->fp;
@@ -42,7 +40,7 @@ static int unwind_user_next_fp(struct un
}
/* Get the Canonical Frame Address (CFA) */
- cfa += frame.cfa_off;
+ cfa += frame->cfa_off;
/* stack going in wrong direction? */
if (cfa <= state->sp)
@@ -53,19 +51,37 @@ static int unwind_user_next_fp(struct un
return -EINVAL;
/* Find the Return Address (RA) */
- if (get_user_word(&ra, cfa, frame.ra_off, state->ws))
+ if (get_user_word(&ra, cfa, frame->ra_off, state->ws))
return -EINVAL;
- if (frame.fp_off && get_user_word(&fp, cfa, frame.fp_off, state->ws))
+ if (frame->fp_off && get_user_word(&fp, cfa, frame->fp_off, state->ws))
return -EINVAL;
state->ip = ra;
state->sp = cfa;
- if (frame.fp_off)
+ if (frame->fp_off)
state->fp = fp;
+ state->topmost = false;
return 0;
}
+static int unwind_user_next_fp(struct unwind_user_state *state)
+{
+ struct pt_regs *regs = task_pt_regs(current);
+
+ const struct unwind_user_frame fp_frame = {
+ ARCH_INIT_USER_FP_FRAME(state->ws)
+ };
+ const struct unwind_user_frame fp_entry_frame = {
+ ARCH_INIT_USER_FP_ENTRY_FRAME(state->ws)
+ };
+
+ if (state->topmost && unwind_user_at_function_start(regs))
+ return unwind_user_next_common(state, &fp_entry_frame);
+
+ return unwind_user_next_common(state, &fp_frame);
+}
+
static int unwind_user_next(struct unwind_user_state *state)
{
unsigned long iter_mask = state->available_types;
@@ -118,6 +134,7 @@ static int unwind_user_start(struct unwi
state->done = true;
return -EINVAL;
}
+ state->topmost = true;
return 0;
}
Powered by blists - more mailing lists