linux-kernel - [PATCH v6 4/5] unwind deferred: Use SRCU unwind_deferred_task

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250501013506.586292205@goodmis.org>
Date: Wed, 30 Apr 2025 21:32:06 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: linux-kernel@...r.kernel.org,
 linux-trace-kernel@...r.kernel.org
Cc: Masami Hiramatsu <mhiramat@...nel.org>,
 Mark Rutland <mark.rutland@....com>,
 Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
 Andrew Morton <akpm@...ux-foundation.org>,
 Josh Poimboeuf <jpoimboe@...nel.org>,
 x86@...nel.org,
 Peter Zijlstra <peterz@...radead.org>,
 Ingo Molnar <mingo@...nel.org>,
 Arnaldo Carvalho de Melo <acme@...nel.org>,
 Indu Bhagat <indu.bhagat@...cle.com>,
 Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
 Jiri Olsa <jolsa@...nel.org>,
 Namhyung Kim <namhyung@...nel.org>,
 Ian Rogers <irogers@...gle.com>,
 Adrian Hunter <adrian.hunter@...el.com>,
 linux-perf-users@...r.kernel.org,
 Mark Brown <broonie@...nel.org>,
 linux-toolchains@...r.kernel.org,
 Jordan Rome <jordalgo@...a.com>,
 Sam James <sam@...too.org>,
 Andrii Nakryiko <andrii.nakryiko@...il.com>,
 Jens Remus <jremus@...ux.ibm.com>,
 Florian Weimer <fweimer@...hat.com>,
 Andy Lutomirski <luto@...nel.org>,
 Weinan Liu <wnliu@...gle.com>,
 Blake Jones <blakejones@...gle.com>,
 Beau Belgrave <beaub@...ux.microsoft.com>,
 "Jose E. Marchesi" <jemarch@....org>,
 Alexander Aring <aahringo@...hat.com>
Subject: [PATCH v6 4/5] unwind deferred: Use SRCU unwind_deferred_task_work()

From: Steven Rostedt <rostedt@...dmis.org>

Instead of using the callback_mutex to protect the link list of callbacks
in unwind_deferred_task_work(), use SRCU instead. This gets called every
time a task exits that has to record a stack trace that was requested.
This can happen for many tasks on several CPUs at the same time. A mutex
is a bottleneck and can cause a bit of contention and slow down performance.

As the callbacks themselves are allowed to sleep, regular RCU can not be
used to protect the list. Instead use SRCU, as that still allows the
callbacks to sleep and the list can be read without needing to hold the
callback_mutex.

Link: https://lore.kernel.org/all/ca9bd83a-6c80-4ee0-a83c-224b9d60b755@efficios.com/

Suggested-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
---
 kernel/unwind/deferred.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
index 716393dff810..5f98ac5e3a1b 100644
--- a/kernel/unwind/deferred.c
+++ b/kernel/unwind/deferred.c
@@ -23,10 +23,11 @@
  */
 static DEFINE_PER_CPU(u64, unwind_ctx_ctr);
 
-/* Guards adding to and reading the list of callbacks */
+/* Guards adding to or removing from the list of callbacks */
 static DEFINE_MUTEX(callback_mutex);
 static LIST_HEAD(callbacks);
 static unsigned long unwind_mask;
+DEFINE_STATIC_SRCU(unwind_srcu);
 
 /*
  * The context cookie is a unique identifier that is assigned to a user
@@ -137,6 +138,7 @@ static void unwind_deferred_task_work(struct callback_head *head)
 	struct unwind_work *work;
 	struct task_struct *task = current;
 	u64 cookie;
+	int idx;
 
 	if (WARN_ON_ONCE(!info->pending))
 		return;
@@ -155,14 +157,16 @@ static void unwind_deferred_task_work(struct callback_head *head)
 
 	cookie = get_cookie(info);
 
-	guard(mutex)(&callback_mutex);
-	list_for_each_entry(work, &callbacks, list) {
+	idx = srcu_read_lock(&unwind_srcu);
+	list_for_each_entry_srcu(work, &callbacks, list,
+				 srcu_read_lock_held(&unwind_srcu)) {
 		if (task->unwind_mask & (1UL << work->bit)) {
 			work->func(work, &trace, cookie);
 			clear_bit(work->bit, &current->unwind_mask);
 		}
 	}
-	barrier();
+	srcu_read_unlock(&unwind_srcu, idx);
+
 	/* If another task work is pending, reuse the cookie and stack trace */
 	if (!READ_ONCE(info->pending))
 		WRITE_ONCE(info->cookie, 0);
@@ -238,6 +242,7 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 {
 	struct unwind_task_info *info = &current->unwind_info;
 	int pending;
+	int bit;
 	int ret;
 
 	*cookie = 0;
@@ -249,12 +254,17 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 	if (in_nmi())
 		return unwind_deferred_request_nmi(work, cookie);
 
+	/* Do not allow cancelled works to request again */
+	bit = READ_ONCE(work->bit);
+	if (WARN_ON_ONCE(bit < 0))
+		return -EINVAL;
+
 	guard(irqsave)();
 
 	*cookie = get_cookie(info);
 
 	/* This is already queued */
-	if (current->unwind_mask & (1UL << work->bit))
+	if (current->unwind_mask & (1UL << bit))
 		return 1;
 
 	/* callback already pending? */
@@ -280,19 +290,26 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
 void unwind_deferred_cancel(struct unwind_work *work)
 {
 	struct task_struct *g, *t;
+	int bit;
 
 	if (!work)
 		return;
 
 	guard(mutex)(&callback_mutex);
-	list_del(&work->list);
+	list_del_rcu(&work->list);
+	bit = work->bit;
+
+	/* Do not allow any more requests and prevent callbacks */
+	work->bit = -1;
+
+	clear_bit(bit, &unwind_mask);
 
-	clear_bit(work->bit, &unwind_mask);
+	synchronize_srcu(&unwind_srcu);
 
 	guard(rcu)();
 	/* Clear this bit from all threads */
 	for_each_process_thread(g, t) {
-		clear_bit(work->bit, &t->unwind_mask);
+		clear_bit(bit, &t->unwind_mask);
 	}
 }
 
@@ -309,7 +326,7 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
 	work->bit = ffz(unwind_mask);
 	unwind_mask |= 1UL << work->bit;
 
-	list_add(&work->list, &callbacks);
+	list_add_rcu(&work->list, &callbacks);
 	work->func = func;
 	return 0;
 }
-- 
2.47.2