[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAEf4BzY3xJ=W2qPD8i6UbSB=zNqpiA1gSd+SC3wKxQAJWjeHhA@mail.gmail.com>
Date: Tue, 29 Oct 2024 16:32:46 -0700
From: Andrii Nakryiko <andrii.nakryiko@...il.com>
To: Josh Poimboeuf <jpoimboe@...nel.org>
Cc: x86@...nel.org, Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>, Ingo Molnar <mingo@...nel.org>,
Arnaldo Carvalho de Melo <acme@...nel.org>, linux-kernel@...r.kernel.org,
Indu Bhagat <indu.bhagat@...cle.com>, Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>, Jiri Olsa <jolsa@...nel.org>,
Namhyung Kim <namhyung@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>, linux-perf-users@...r.kernel.org,
Mark Brown <broonie@...nel.org>, linux-toolchains@...r.kernel.org,
Jordan Rome <jordalgo@...a.com>, Sam James <sam@...too.org>, linux-trace-kernel@...r.kerne.org,
Jens Remus <jremus@...ux.ibm.com>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Florian Weimer <fweimer@...hat.com>, Andy Lutomirski <luto@...nel.org>
Subject: Re: [PATCH v3 11/19] unwind: Add deferred user space unwinding API
On Mon, Oct 28, 2024 at 2:48 PM Josh Poimboeuf <jpoimboe@...nel.org> wrote:
>
> Add unwind_user_deferred() which allows callers to schedule task work to
> unwind the user space stack before returning to user space. This solves
> several problems for its callers:
>
> - Ensure the unwind happens in task context even if the caller may
> running in interrupt context.
>
> - Only do the unwind once, even if called multiple times either by the
> same caller or multiple callers.
>
> - Create a "context context" cookie which allows trace post-processing
> to correlate kernel unwinds/traces with the user unwind.
>
> Signed-off-by: Josh Poimboeuf <jpoimboe@...nel.org>
> ---
> include/linux/entry-common.h | 3 +
> include/linux/sched.h | 5 +
> include/linux/unwind_user.h | 56 ++++++++++
> kernel/fork.c | 4 +
> kernel/unwind/user.c | 199 +++++++++++++++++++++++++++++++++++
> 5 files changed, 267 insertions(+)
>
> diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
> index 1e50cdb83ae5..efbe8f964f31 100644
> --- a/include/linux/entry-common.h
> +++ b/include/linux/entry-common.h
> @@ -12,6 +12,7 @@
> #include <linux/resume_user_mode.h>
> #include <linux/tick.h>
> #include <linux/kmsan.h>
> +#include <linux/unwind_user.h>
>
> #include <asm/entry-common.h>
>
> @@ -111,6 +112,8 @@ static __always_inline void enter_from_user_mode(struct pt_regs *regs)
> CT_WARN_ON(__ct_state() != CT_STATE_USER);
> user_exit_irqoff();
>
> + unwind_enter_from_user_mode();
> +
> instrumentation_begin();
> kmsan_unpoison_entry_regs(regs);
> trace_hardirqs_off_finish();
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 5007a8e2d640..31b6f1d763ef 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -47,6 +47,7 @@
> #include <linux/livepatch_sched.h>
> #include <linux/uidgid_types.h>
> #include <asm/kmap_size.h>
> +#include <linux/unwind_user.h>
>
> /* task_struct member predeclarations (sorted alphabetically): */
> struct audit_context;
> @@ -1592,6 +1593,10 @@ struct task_struct {
> struct user_event_mm *user_event_mm;
> #endif
>
> +#ifdef CONFIG_UNWIND_USER
> + struct unwind_task_info unwind_task_info;
this is quite a lot of memory to pay on each task, a lot of which a)
might not have sframe and b) might not need stack unwinding during
their lifetime.
It can be a pointer and allocated in copy_process(), no? Though
ideally this should be initialized lazily, if possible.
> +#endif
> +
> /*
> * New fields for task_struct should be added above here, so that
> * they are included in the randomized portion of task_struct.
[...]
> +static void unwind_user_task_work(struct callback_head *head)
> +{
> + struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
> + struct task_struct *task = container_of(info, struct task_struct, unwind_task_info);
> + void *privs[UNWIND_MAX_CALLBACKS];
> + struct unwind_stacktrace trace;
> + unsigned long pending;
> + u64 cookie = 0;
> + int i;
> +
> + BUILD_BUG_ON(UNWIND_MAX_CALLBACKS > 32);
> +
> + if (WARN_ON_ONCE(task != current))
> + return;
> +
> + if (WARN_ON_ONCE(!info->ctx_cookie || !info->pending_callbacks))
> + return;
> +
> + scoped_guard(irqsave) {
> + pending = info->pending_callbacks;
> + cookie = info->ctx_cookie;
> +
> + info->pending_callbacks = 0;
> + info->ctx_cookie = 0;
> + memcpy(privs, info->privs, sizeof(void *) * UNWIND_MAX_CALLBACKS);
> + }
> +
> + if (!info->entries) {
> + info->entries = kmalloc(UNWIND_MAX_ENTRIES * sizeof(long),
> + GFP_KERNEL);
> + if (!info->entries)
> + return;
uhm... can we notify callbacks that stack capture failed? otherwise
we'd need some extra timeouts and other complications if we are
*waiting* for this callback to be called
> + }
> +
> + trace.entries = info->entries;
> + trace.nr = 0;
> + unwind_user(&trace, UNWIND_MAX_ENTRIES);
> +
[...]
Powered by blists - more mailing lists