lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250618184620.GT1613376@noisy.programming.kicks-ass.net>
Date: Wed, 18 Jun 2025 20:46:20 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Steven Rostedt <rostedt@...dmis.org>
Cc: linux-kernel@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
	bpf@...r.kernel.org, x86@...nel.org,
	Masami Hiramatsu <mhiramat@...nel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Josh Poimboeuf <jpoimboe@...nel.org>,
	Ingo Molnar <mingo@...nel.org>, Jiri Olsa <jolsa@...nel.org>,
	Namhyung Kim <namhyung@...nel.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Andrii Nakryiko <andrii@...nel.org>,
	Indu Bhagat <indu.bhagat@...cle.com>,
	"Jose E. Marchesi" <jemarch@....org>,
	Beau Belgrave <beaub@...ux.microsoft.com>,
	Jens Remus <jremus@...ux.ibm.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>
Subject: Re: [PATCH v10 06/14] unwind_user/deferred: Add deferred unwinding
 interface


> +struct unwind_work;
> +
> +typedef void (*unwind_callback_t)(struct unwind_work *work, struct unwind_stacktrace *trace, u64 timestamp);
> +
> +struct unwind_work {
> +	struct list_head		list;

Does this really need to be a list? Single linked list like
callback_head not good enough?

> +	unwind_callback_t		func;
> +};
> +
>  #ifdef CONFIG_UNWIND_USER
>  
>  void unwind_task_init(struct task_struct *task);
> @@ -12,10 +22,15 @@ void unwind_task_free(struct task_struct *task);
>  
>  int unwind_deferred_trace(struct unwind_stacktrace *trace);
>  
> +int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func);
> +int unwind_deferred_request(struct unwind_work *work, u64 *timestamp);
> +void unwind_deferred_cancel(struct unwind_work *work);
> +
>  static __always_inline void unwind_exit_to_user_mode(void)
>  {
>  	if (unlikely(current->unwind_info.cache))
>  		current->unwind_info.cache->nr_entries = 0;
> +	current->unwind_info.timestamp = 0;

Surely clearing that timestamp is only relevant when there is a cache
around? Better to not add this unconditional write to the exit path.

>  }
>  
>  #else /* !CONFIG_UNWIND_USER */
> @@ -24,6 +39,9 @@ static inline void unwind_task_init(struct task_struct *task) {}
>  static inline void unwind_task_free(struct task_struct *task) {}
>  
>  static inline int unwind_deferred_trace(struct unwind_stacktrace *trace) { return -ENOSYS; }
> +static inline int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func) { return -ENOSYS; }
> +static inline int unwind_deferred_request(struct unwind_work *work, u64 *timestamp) { return -ENOSYS; }
> +static inline void unwind_deferred_cancel(struct unwind_work *work) {}
>  
>  static inline void unwind_exit_to_user_mode(void) {}
>  
> diff --git a/include/linux/unwind_deferred_types.h b/include/linux/unwind_deferred_types.h
> index db5b54b18828..5df264cf81ad 100644
> --- a/include/linux/unwind_deferred_types.h
> +++ b/include/linux/unwind_deferred_types.h
> @@ -9,6 +9,9 @@ struct unwind_cache {
>  
>  struct unwind_task_info {
>  	struct unwind_cache	*cache;
> +	struct callback_head	work;
> +	u64			timestamp;
> +	int			pending;
>  };
>  
>  #endif /* _LINUX_UNWIND_USER_DEFERRED_TYPES_H */
> diff --git a/kernel/unwind/deferred.c b/kernel/unwind/deferred.c
> index e3913781c8c6..b76c704ddc6d 100644
> --- a/kernel/unwind/deferred.c
> +++ b/kernel/unwind/deferred.c
> @@ -2,13 +2,35 @@
>  /*
>   * Deferred user space unwinding
>   */
> +#include <linux/sched/task_stack.h>
> +#include <linux/unwind_deferred.h>
> +#include <linux/sched/clock.h>
> +#include <linux/task_work.h>
>  #include <linux/kernel.h>
>  #include <linux/sched.h>
>  #include <linux/slab.h>
> -#include <linux/unwind_deferred.h>
> +#include <linux/mm.h>
>  
>  #define UNWIND_MAX_ENTRIES 512
>  
> +/* Guards adding to and reading the list of callbacks */
> +static DEFINE_MUTEX(callback_mutex);
> +static LIST_HEAD(callbacks);

Global state.. smells like failure.

> +/*
> + * Read the task context timestamp, if this is the first caller then
> + * it will set the timestamp.
> + */
> +static u64 get_timestamp(struct unwind_task_info *info)
> +{
> +	lockdep_assert_irqs_disabled();
> +
> +	if (!info->timestamp)
> +		info->timestamp = local_clock();
> +
> +	return info->timestamp;
> +}
> +
>  /**
>   * unwind_deferred_trace - Produce a user stacktrace in faultable context
>   * @trace: The descriptor that will store the user stacktrace
> @@ -59,11 +81,117 @@ int unwind_deferred_trace(struct unwind_stacktrace *trace)
>  	return 0;
>  }
>  
> +static void unwind_deferred_task_work(struct callback_head *head)
> +{
> +	struct unwind_task_info *info = container_of(head, struct unwind_task_info, work);
> +	struct unwind_stacktrace trace;
> +	struct unwind_work *work;
> +	u64 timestamp;
> +
> +	if (WARN_ON_ONCE(!info->pending))
> +		return;
> +
> +	/* Allow work to come in again */
> +	WRITE_ONCE(info->pending, 0);
> +
> +	/*
> +	 * From here on out, the callback must always be called, even if it's
> +	 * just an empty trace.
> +	 */
> +	trace.nr = 0;
> +	trace.entries = NULL;
> +
> +	unwind_deferred_trace(&trace);
> +
> +	timestamp = info->timestamp;
> +
> +	guard(mutex)(&callback_mutex);
> +	list_for_each_entry(work, &callbacks, list) {
> +		work->func(work, &trace, timestamp);
> +	}

So now you're globally serializing all return-to-user instances. How is
that not a problem?

> +}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ