[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <02339b93-de9a-49e4-8dbb-137d02fc6ea8@efficios.com>
Date: Mon, 28 Apr 2025 12:33:50 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Steven Rostedt <rostedt@...dmis.org>, linux-kernel@...r.kernel.org,
linux-trace-kernel@...r.kernel.org
Cc: Masami Hiramatsu <mhiramat@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Andrew Morton <akpm@...ux-foundation.org>,
Josh Poimboeuf <jpoimboe@...nel.org>, x86@...nel.org,
Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>,
Indu Bhagat <indu.bhagat@...cle.com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Namhyung Kim <namhyung@...nel.org>,
Ian Rogers <irogers@...gle.com>, Adrian Hunter <adrian.hunter@...el.com>,
linux-perf-users@...r.kernel.org, Mark Brown <broonie@...nel.org>,
linux-toolchains@...r.kernel.org, Jordan Rome <jordalgo@...a.com>,
Sam James <sam@...too.org>, Andrii Nakryiko <andrii.nakryiko@...il.com>,
Jens Remus <jremus@...ux.ibm.com>, Florian Weimer <fweimer@...hat.com>,
Andy Lutomirski <luto@...nel.org>, Weinan Liu <wnliu@...gle.com>,
Blake Jones <blakejones@...gle.com>,
Beau Belgrave <beaub@...ux.microsoft.com>, "Jose E. Marchesi"
<jemarch@....org>, Alexander Aring <aahringo@...hat.com>
Subject: Re: [PATCH v5 3/9] unwind deferred: Use bitmask to determine which
callbacks to call
On 2025-04-24 15:24, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@...dmis.org>
>
> In order to know which registered callback requested a stacktrace for when
> the task goes back to user space, add a bitmask for all registered
> tracers. The bitmask is the size of log, which means that on a 32 bit
size of long
> machine, it can have at most 32 registered tracers, and on 64 bit, it can
> have at most 64 registered tracers. This should not be an issue as there
> should not be more than 10 (unless BPF can abuse this?).
>
> When a tracer registers with unwind_deferred_init() it will get a bit
> number assigned to it. When a tracer requests a stacktrace, it will have
> its bit set within the task_struct. When the task returns back to user
> space, it will call the callbacks for all the registered tracers where
> their bits are set in the task's mask.
>
> When a tracer is removed by the unwind_deferred_cancel() all current tasks
> will clear the associated bit, just in case another tracer gets registered
> immediately afterward and then gets their callback called unexpectedly.
>
> Signed-off-by: Steven Rostedt (Google) <rostedt@...dmis.org>
> ---
> include/linux/sched.h | 1 +
> include/linux/unwind_deferred.h | 1 +
> kernel/unwind/deferred.c | 44 ++++++++++++++++++++++++++++++---
> 3 files changed, 42 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index a1e1c07cadfb..d3ee0c5405d6 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1649,6 +1649,7 @@ struct task_struct {
>
> #ifdef CONFIG_UNWIND_USER
> struct unwind_task_info unwind_info;
> + unsigned long unwind_mask;
> #endif
>
> /* CPU-specific state of this task: */
> diff --git a/include/linux/unwind_deferred.h b/include/linux/unwind_deferred.h
> index d36784cae658..719a7cfb3164 100644
> --- a/include/linux/unwind_deferred.h
> +++ b/include/linux/unwind_deferred.h
> @@ -13,6 +13,7 @@ typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stackt
> struct unwind_work {
> struct list_head list;
> unwind_callback_t func;
> + int bit;
int or unsigned int ?
Rename "bit" to "requester_id" ?
> };
>
> #ifdef CONFIG_UNWIND_USER
> diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
> index 2afd197da2ef..f505cb1766de 100644
> --- a/kernel/unwind/deferred.c
> +++ b/kernel/unwind/deferred.c
> @@ -26,6 +26,7 @@ static DEFINE_PER_CPU(u64, unwind_ctx_ctr);
> /* Guards adding to and reading the list of callbacks */
> static DEFINE_MUTEX(callback_mutex);
> static LIST_HEAD(callbacks);
> +static unsigned long unwind_mask;
Perhaps "reserved_unwind_mask" ?
>
> /*
> * The context cookie is a unique identifier that is assigned to a user
> @@ -135,6 +136,7 @@ static void unwind_deferred_task_work(struct callback_head *head)
> struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
> struct unwind_stacktrace trace;
> struct unwind_work *work;
> + struct task_struct *task = current;
> u64 cookie;
>
> if (WARN_ON_ONCE(!info->pending))
> @@ -156,7 +158,10 @@ static void unwind_deferred_task_work(struct callback_head *head)
>
> guard(mutex)(&callback_mutex);
> list_for_each_entry(work, &callbacks, list) {
> - work->func(work, &trace, cookie);
> + if (task->unwind_mask & (1UL << work->bit)) {
> + work->func(work, &trace, cookie);
> + clear_bit(work->bit, ¤t->unwind_mask);
> + }
You could change this list of callbacks for an array of pointers,
indexed by "requester_id".
Then you can do a for each bit on task->unwind_mask, and all bits
that match end up calling the callback for the matching array index.
> }
> barrier();
> /* If another task work is pending, reuse the cookie and stack trace */
> @@ -194,9 +199,12 @@ static int unwind_deferred_request_nmi(struct unwind_work *work, u64 *cookie)
> *cookie = info->nmi_cookie;
> }
>
> - if (info->pending)
> + if (current->unwind_mask & (1UL << work->bit))
> return 0;
>
> + if (info->pending)
> + goto out;
> +
> ret = task_work_add(current, &info->work, TWA_NMI_CURRENT);
> if (ret) {
> if (inited_cookie)
> @@ -205,6 +213,8 @@ static int unwind_deferred_request_nmi(struct unwind_work *work, u64 *cookie)
> }
>
> info->pending = 1;
> + out:
> + set_bit(work->bit, ¤t->unwind_mask);
>
> return 0;
> }
> @@ -244,14 +254,18 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
>
> *cookie = get_cookie(info);
>
> + /* This is already queued */
> + if (current->unwind_mask & (1UL << work->bit))
> + return 0;
> +
> /* callback already pending? */
> pending = READ_ONCE(info->pending);
> if (pending)
> - return 0;
> + goto out;
>
> /* Claim the work unless an NMI just now swooped in to do so. */
> if (!try_cmpxchg(&info->pending, &pending, 1))
Not that it necessarily matters performance wise here, but can this be a
try_cmpxchg_local if we're working on the task struct and only expecting
interruption from NMIs ?
> - return 0;
> + goto out;
>
> /* The work has been claimed, now schedule it. */
> ret = task_work_add(current, &info->work, TWA_RESUME);
> @@ -260,16 +274,29 @@ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
> return ret;
> }
>
> + out:
> + set_bit(work->bit, ¤t->unwind_mask);
> +
> return 0;
> }
>
> void unwind_deferred_cancel(struct unwind_work *work)
> {
> + struct task_struct *g, *t;
> +
> if (!work)
> return;
>
> guard(mutex)(&callback_mutex);
> list_del(&work->list);
> +
> + clear_bit(work->bit, &unwind_mask);
> +
> + guard(rcu)();
> + /* Clear this bit from all threads */
> + for_each_process_thread(g, t) {
> + clear_bit(work->bit, &t->unwind_mask);
> + }
It is enough to guard with RCU ? See syscall_regfunc() from
tracepoint.c where we do:
read_lock(&tasklist_lock);
for_each_process_thread(p, t) {
set_task_syscall_work(t, SYSCALL_TRACEPOINT);
}
read_unlock(&tasklist_lock);
To prevent concurrent fork from adding threads while we
iterate, thus opening the possibility of missing a clear
due to a concurrent fork + set bit.
Thanks,
Mathieu
> }
>
> int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
> @@ -277,6 +304,14 @@ int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
> memset(work, 0, sizeof(*work));
>
> guard(mutex)(&callback_mutex);
> +
> + /* See if there's a bit in the mask available */
> + if (unwind_mask == ~0UL)
> + return -EBUSY;
> +
> + work->bit = ffz(unwind_mask);
> + unwind_mask |= 1UL << work->bit;
> +
> list_add(&work->list, &callbacks);
> work->func = func;
> return 0;
> @@ -288,6 +323,7 @@ void unwind_task_init(struct task_struct *task)
>
> memset(info, 0, sizeof(*info));
> init_task_work(&info->work, unwind_deferred_task_work);
> + task->unwind_mask = 0;
> }
>
> void unwind_task_free(struct task_struct *task)
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Powered by blists - more mailing lists