lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250714132936.GB4105545@noisy.programming.kicks-ass.net>
Date: Mon, 14 Jul 2025 15:29:36 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Steven Rostedt <rostedt@...nel.org>
Cc: linux-kernel@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
	bpf@...r.kernel.org, x86@...nel.org,
	Masami Hiramatsu <mhiramat@...nel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Josh Poimboeuf <jpoimboe@...nel.org>,
	Ingo Molnar <mingo@...nel.org>, Jiri Olsa <jolsa@...nel.org>,
	Namhyung Kim <namhyung@...nel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Andrii Nakryiko <andrii@...nel.org>,
	Indu Bhagat <indu.bhagat@...cle.com>,
	"Jose E. Marchesi" <jemarch@....org>,
	Beau Belgrave <beaub@...ux.microsoft.com>,
	Jens Remus <jremus@...ux.ibm.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Jens Axboe <axboe@...nel.dk>, Florian Weimer <fweimer@...hat.com>,
	Sam James <sam@...too.org>
Subject: Re: [PATCH v13 07/14] unwind_user/deferred: Make unwind deferral
 requests NMI-safe

On Mon, Jul 07, 2025 at 09:22:46PM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@...dmis.org>
> 
> Make unwind_deferred_request() NMI-safe so tracers in NMI context can
> call it and safely request a user space stacktrace when the task exits.
> 
> Note, this is only allowed for architectures that implement a safe
> cmpxchg. If an architecture requests a deferred stack trace from NMI
> context that does not support a safe NMI cmpxchg, it will get an -EINVAL.
> For those architectures, they would need another method (perhaps an
> irqwork), to request a deferred user space stack trace. That can be dealt
> with later if one of theses architectures require this feature.
> 
> Suggested-by: Peter Zijlstra <peterz@...radead.org>

How's this instead?

---
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -12,6 +12,40 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 
+/*
+ * For requesting a deferred user space stack trace from NMI context
+ * the architecture must support a safe cmpxchg in NMI context.
+ * For those architectures that do not have that, then it cannot ask
+ * for a deferred user space stack trace from an NMI context. If it
+ * does, then it will get -EINVAL.
+ */
+#ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
+#define UNWIND_NMI_SAFE 1
+static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
+{
+	u32 zero = 0;
+	return try_cmpxchg(&info->id.cnt, &zero, cnt);
+}
+static inline bool test_and_set_pending(struct unwind_task_info *info)
+{
+	return info->pending || cmpxchg_local(&info->pending, 0, 1);
+}
+#else
+#define UNWIND_NMI_SAFE 0
+/* When NMIs are not allowed, this always succeeds */
+static inline bool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
+{
+	info->id.cnt = cnt;
+	return true;
+}
+static inline bool test_and_set_pending(struct unwind_task_info *info)
+{
+	int pending = info->pending;
+	info->pending = 1;
+	return pending;
+}
+#endif /* CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG */
+
 /* Make the cache fit in a 4K page */
 #define UNWIND_MAX_ENTRIES					\
 	((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
@@ -41,21 +75,16 @@ static DEFINE_PER_CPU(u32, unwind_ctx_ct
  */
 static u64 get_cookie(struct unwind_task_info *info)
 {
-	u32 cpu_cnt;
-	u32 cnt;
-	u32 old = 0;
+	u32 cnt = 1;
 
 	if (info->id.cpu)
 		return info->id.id;
 
-	cpu_cnt = __this_cpu_read(unwind_ctx_ctr);
-	cpu_cnt += 2;
-	cnt = cpu_cnt | 1; /* Always make non zero */
-
-	if (try_cmpxchg(&info->id.cnt, &old, cnt)) {
-		/* Update the per cpu counter */
-		__this_cpu_write(unwind_ctx_ctr, cpu_cnt);
-	}
+	/* LSB it always set to ensure 0 is an invalid value. */
+	cnt |= __this_cpu_read(unwind_ctx_ctr) + 2;
+	if (try_assign_cnt(info, cnt))
+		__this_cpu_write(unwind_ctx_ctr, cnt);
+
 	/* Interrupts are disabled, the CPU will always be same */
 	info->id.cpu = smp_processor_id() + 1; /* Must be non zero */
 
@@ -174,27 +203,29 @@ int unwind_deferred_request(struct unwin
 
 	*cookie = 0;
 
-	if (WARN_ON_ONCE(in_nmi()))
-		return -EINVAL;
-
 	if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
 	    !user_mode(task_pt_regs(current)))
 		return -EINVAL;
 
+	/* NMI requires having safe cmpxchg operations */
+	if (WARN_ON_ONCE(!UNWIND_NMI_SAFE && in_nmi()))
+		return -EINVAL;
+
 	guard(irqsave)();
 
 	*cookie = get_cookie(info);
 
 	/* callback already pending? */
-	if (info->pending)
+	if (test_and_set_pending(info))
 		return 1;
 
 	/* The work has been claimed, now schedule it. */
 	ret = task_work_add(current, &info->work, TWA_RESUME);
-	if (WARN_ON_ONCE(ret))
+	if (WARN_ON_ONCE(ret)) {
+		WRITE_ONCE(info->pending, 0);
 		return ret;
+	}
 
-	info->pending = 1;
 	return 0;
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ