lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251024104119.GJ4068168@noisy.programming.kicks-ass.net>
Date: Fri, 24 Oct 2025 12:41:19 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Steven Rostedt <rostedt@...nel.org>, jremus@...ux.ibm.com
Cc: linux-kernel@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
	bpf@...r.kernel.org, x86@...nel.org,
	Masami Hiramatsu <mhiramat@...nel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Josh Poimboeuf <jpoimboe@...nel.org>,
	Ingo Molnar <mingo@...nel.org>, Jiri Olsa <jolsa@...nel.org>,
	Arnaldo Carvalho de Melo <acme@...nel.org>,
	Namhyung Kim <namhyung@...nel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Andrii Nakryiko <andrii@...nel.org>,
	Indu Bhagat <indu.bhagat@...cle.com>,
	"Jose E. Marchesi" <jemarch@....org>,
	Beau Belgrave <beaub@...ux.microsoft.com>,
	Jens Remus <jremus@...ux.ibm.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Florian Weimer <fweimer@...hat.com>, Sam James <sam@...too.org>,
	Kees Cook <kees@...nel.org>, Carlos O'Donell <codonell@...hat.com>
Subject: Re: [PATCH v16 0/4] perf: Support the deferred unwinding
 infrastructure

On Fri, Oct 24, 2025 at 11:29:26AM +0200, Peter Zijlstra wrote:
> On Thu, Oct 23, 2025 at 05:00:02PM +0200, Peter Zijlstra wrote:
> 
> > Trouble is, pretty much every unwind is 510 entries long -- this cannot
> > be right. I'm sure there's a silly mistake in unwind/user.c but I'm too
> > tired to find it just now. I'll try again tomorrow.
> 
> PEBKAC

Anyway, while staring at this, I noted that the perf userspace unwind
code has a few bits that are missing from the new shiny thing.

How about something like so? This add an optional arch specific unwinder
at the very highest priority (bit 0) and uses that to do a few extra
bits before disabling itself and falling back to whatever lower prio
unwinder to do the actual unwinding.

---
 arch/x86/events/core.c             |   40 ---------------------------
 arch/x86/include/asm/unwind_user.h |    4 ++
 arch/x86/include/asm/uprobes.h     |    9 ++++++
 arch/x86/kernel/unwind_user.c      |   53 +++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/uprobes.c          |   32 ++++++++++++++++++++++
 include/linux/unwind_user_types.h  |    5 ++-
 kernel/unwind/user.c               |    7 ++++
 7 files changed, 109 insertions(+), 41 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2845,46 +2845,6 @@ static unsigned long get_segment_base(un
 	return get_desc_base(desc);
 }
 
-#ifdef CONFIG_UPROBES
-/*
- * Heuristic-based check if uprobe is installed at the function entry.
- *
- * Under assumption of user code being compiled with frame pointers,
- * `push %rbp/%ebp` is a good indicator that we indeed are.
- *
- * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
- * If we get this wrong, captured stack trace might have one extra bogus
- * entry, but the rest of stack trace will still be meaningful.
- */
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
-	struct arch_uprobe *auprobe;
-
-	if (!current->utask)
-		return false;
-
-	auprobe = current->utask->auprobe;
-	if (!auprobe)
-		return false;
-
-	/* push %rbp/%ebp */
-	if (auprobe->insn[0] == 0x55)
-		return true;
-
-	/* endbr64 (64-bit only) */
-	if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
-		return true;
-
-	return false;
-}
-
-#else
-static bool is_uprobe_at_func_entry(struct pt_regs *regs)
-{
-	return false;
-}
-#endif /* CONFIG_UPROBES */
-
 #ifdef CONFIG_IA32_EMULATION
 
 #include <linux/compat.h>
--- a/arch/x86/include/asm/unwind_user.h
+++ b/arch/x86/include/asm/unwind_user.h
@@ -8,4 +8,8 @@
 	.fp_off		= -2*(ws),			\
 	.use_fp		= true,
 
+#define HAVE_UNWIND_USER_ARCH 1
+
+extern int unwind_user_next_arch(struct unwind_user_state *state);
+
 #endif /* _ASM_X86_UNWIND_USER_H */
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -62,4 +62,13 @@ struct arch_uprobe_task {
 	unsigned int			saved_tf;
 };
 
+#ifdef CONFIG_UPROBES
+extern bool is_uprobe_at_func_entry(struct pt_regs *regs);
+#else
+static bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	return false;
+}
+#endif /* CONFIG_UPROBES */
+
 #endif	/* _ASM_UPROBES_H */
--- /dev/null
+++ b/arch/x86/kernel/unwind_user.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/unwind_user.h>
+#include <linux/uprobes.h>
+#include <linux/uaccess.h>
+#include <linux/sched/task_stack.h>
+#include <asm/processor.h>
+#include <asm/tlbflush.h>
+
+int unwind_user_next_arch(struct unwind_user_state *state)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+
+	/* only once, on the first iteration */
+	state->available_types &= ~UNWIND_USER_TYPE_ARCH;
+
+	/* We don't know how to unwind VM86 stacks. */
+	if (regs->flags & X86_VM_MASK) {
+		state->done = true;
+		return 0;
+	}
+
+	/*
+	 * If we are called from uprobe handler, and we are indeed at the very
+	 * entry to user function (which is normally a `push %rbp` instruction,
+	 * under assumption of application being compiled with frame pointers),
+	 * we should read return address from *regs->sp before proceeding
+	 * to follow frame pointers, otherwise we'll skip immediate caller
+	 * as %rbp is not yet setup.
+	 */
+	if (!is_uprobe_at_func_entry(regs))
+		return -EINVAL;
+
+#ifdef CONFIG_COMPAT
+	if (state->ws == sizeof(int)) {
+		unsigned int retaddr;
+		int ret = get_user(retaddr, (unsigned int __user *)regs->sp);
+		if (ret)
+			return ret;
+
+		state->ip = retaddr;
+		return 0;
+	}
+#endif
+	unsigned long retaddr;
+	int ret = get_user(retaddr, (unsigned long __user *)regs->sp);
+	if (ret)
+		return ret;
+
+	state->ip = retaddr;
+	return 0;
+}
+
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -1791,3 +1791,35 @@ bool arch_uretprobe_is_alive(struct retu
 	else
 		return regs->sp <= ret->stack;
 }
+
+/*
+ * Heuristic-based check if uprobe is installed at the function entry.
+ *
+ * Under assumption of user code being compiled with frame pointers,
+ * `push %rbp/%ebp` is a good indicator that we indeed are.
+ *
+ * Similarly, `endbr64` (assuming 64-bit mode) is also a common pattern.
+ * If we get this wrong, captured stack trace might have one extra bogus
+ * entry, but the rest of stack trace will still be meaningful.
+ */
+bool is_uprobe_at_func_entry(struct pt_regs *regs)
+{
+	struct arch_uprobe *auprobe;
+
+	if (!current->utask)
+		return false;
+
+	auprobe = current->utask->auprobe;
+	if (!auprobe)
+		return false;
+
+	/* push %rbp/%ebp */
+	if (auprobe->insn[0] == 0x55)
+		return true;
+
+	/* endbr64 (64-bit only) */
+	if (user_64bit_mode(regs) && is_endbr((u32 *)auprobe->insn))
+		return true;
+
+	return false;
+}
--- a/include/linux/unwind_user_types.h
+++ b/include/linux/unwind_user_types.h
@@ -3,13 +3,15 @@
 #define _LINUX_UNWIND_USER_TYPES_H
 
 #include <linux/types.h>
+#include <linux/bits.h>
 
 /*
  * Unwind types, listed in priority order: lower numbers are attempted first if
  * available.
  */
 enum unwind_user_type_bits {
-	UNWIND_USER_TYPE_FP_BIT =		0,
+	UNWIND_USER_TYPE_ARCH_BIT = 0,
+	UNWIND_USER_TYPE_FP_BIT,
 
 	NR_UNWIND_USER_TYPE_BITS,
 };
@@ -17,6 +19,7 @@ enum unwind_user_type_bits {
 enum unwind_user_type {
 	/* Type "none" for the start of stack walk iteration. */
 	UNWIND_USER_TYPE_NONE =			0,
+	UNWIND_USER_TYPE_ARCH =			BIT(UNWIND_USER_TYPE_ARCH_BIT),
 	UNWIND_USER_TYPE_FP =			BIT(UNWIND_USER_TYPE_FP_BIT),
 };
 
--- a/kernel/unwind/user.c
+++ b/kernel/unwind/user.c
@@ -79,6 +79,10 @@ static int unwind_user_next(struct unwin
 
 		state->current_type = type;
 		switch (type) {
+		case UNWIND_USER_TYPE_ARCH:
+			if (!unwind_user_next_arch(state))
+				return 0;
+			continue;
 		case UNWIND_USER_TYPE_FP:
 			if (!unwind_user_next_fp(state))
 				return 0;
@@ -107,6 +111,9 @@ static int unwind_user_start(struct unwi
 		return -EINVAL;
 	}
 
+	if (HAVE_UNWIND_USER_ARCH)
+		state->available_types |= UNWIND_USER_TYPE_ARCH;
+
 	if (IS_ENABLED(CONFIG_HAVE_UNWIND_USER_FP))
 		state->available_types |= UNWIND_USER_TYPE_FP;
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ