lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9b606d45-0d3e-4add-9936-380656475b55@efficios.com>
Date: Mon, 25 Aug 2025 15:39:52 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Thomas Gleixner <tglx@...utronix.de>, LKML <linux-kernel@...r.kernel.org>
Cc: Jens Axboe <axboe@...nel.dk>, Peter Zijlstra <peterz@...radead.org>,
 "Paul E. McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
 Paolo Bonzini <pbonzini@...hat.com>, Sean Christopherson
 <seanjc@...gle.com>, Wei Liu <wei.liu@...nel.org>,
 Dexuan Cui <decui@...rosoft.com>, x86@...nel.org,
 Arnd Bergmann <arnd@...db.de>, Heiko Carstens <hca@...ux.ibm.com>,
 Christian Borntraeger <borntraeger@...ux.ibm.com>,
 Sven Schnelle <svens@...ux.ibm.com>, Huacai Chen <chenhuacai@...nel.org>,
 Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>
Subject: Re: [patch V2 36/37] rseq: Switch to TIF_RSEQ if supported

On 2025-08-23 12:40, Thomas Gleixner wrote:
> TIF_NOTIFY_RESUME is a multiplexing TIF bit, which is suboptimal especially
> with the RSEQ fast path depending on it, but not really handling it.
> 
> Define a seperate TIF_RSEQ in the generic TIF space and enable the full
> seperation of fast and slow path for architectures which utilize that.
> 
> That avoids the hassle with invocations of resume_user_mode_work() from
> hypervisors, which clear TIF_NOTIFY_RESUME. It makes the therefore required
> re-evaluation at the end of vcpu_run() a NOOP on architectures which
> utilize the generic TIF space and have a seperate TIF_RSEQ.
> 
> The hypervisor TIF handling does not include the seperate TIF_RSEQ as there
> is no point in doing so. The guest does neither know nor care about the VMM
> host applications RSEQ state. That state is only relevant when the ioctl()
> returns to user space.
> 
> The fastpath implementation still utilizes TIF_NOTIFY_RESUME for failure
> handling, but this only happens within exit_to_user_mode_loop(), so
> arguably the hypervisor ioctl() code is long done when this happens.
> 
> This allows further optimizations for blocking syscall heavy workloads in a
> subsequent step.
> 
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>

Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>

> ---
>   include/asm-generic/thread_info_tif.h |    3 +++
>   include/linux/irq-entry-common.h      |    2 +-
>   include/linux/rseq.h                  |   13 ++++++++++---
>   include/linux/rseq_entry.h            |   23 +++++++++++++++++++----
>   include/linux/thread_info.h           |    5 +++++
>   5 files changed, 38 insertions(+), 8 deletions(-)
> 
> --- a/include/asm-generic/thread_info_tif.h
> +++ b/include/asm-generic/thread_info_tif.h
> @@ -45,4 +45,7 @@
>   # define _TIF_RESTORE_SIGMASK	BIT(TIF_RESTORE_SIGMASK)
>   #endif
>   
> +#define TIF_RSEQ		11	// Run RSEQ fast path
> +#define _TIF_RSEQ		BIT(TIF_RSEQ)
> +
>   #endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -30,7 +30,7 @@
>   #define EXIT_TO_USER_MODE_WORK						\
>   	(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE |		\
>   	 _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY |			\
> -	 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |			\
> +	 _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ |		\
>   	 ARCH_EXIT_TO_USER_MODE_WORK)
>   
>   /**
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -40,7 +40,7 @@ static inline void rseq_signal_deliver(s
>   
>   static inline void rseq_raise_notify_resume(struct task_struct *t)
>   {
> -	set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> +	set_tsk_thread_flag(t, TIF_RSEQ);
>   }
>   
>   /* Invoked from context switch to force evaluation on exit to user */
> @@ -122,7 +122,7 @@ static inline void rseq_force_update(voi
>    */
>   static inline void rseq_virt_userspace_exit(void)
>   {
> -	if (current->rseq_event.sched_switch)
> +	if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) && current->rseq_event.sched_switch)
>   		rseq_raise_notify_resume(current);
>   }
>   
> @@ -147,9 +147,16 @@ static inline void rseq_fork(struct task
>   		/*
>   		 * If it has rseq, force it into the slow path right away
>   		 * because it is guaranteed to fault.
> +		 *
> +		 * Setting TIF_NOTIFY_RESUME is redundant but harmless for
> +		 * architectures which do not have a seperate TIF_RSEQ, but
> +		 * for those who do it's required to enforce the slow path
> +		 * as the scheduler sets only TIF_RSEQ.
>   		 */
> -		if (t->rseq_event.has_rseq)
> +		if (t->rseq_event.has_rseq) {
>   			t->rseq_event.slowpath = true;
> +			set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> +		}
>   	}
>   }
>   
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -502,18 +502,33 @@ static __always_inline bool __rseq_exit_
>   	return true;
>   }
>   
> +#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
> +# define CHECK_TIF_RSEQ		_TIF_RSEQ
> +static __always_inline void clear_tif_rseq(void)
> +{
> +	clear_thread_flag(TIF_RSEQ);
> +}
> +#else
> +# define CHECK_TIF_RSEQ		0UL
> +static inline void clear_tif_rseq(void) { }
> +#endif
> +
>   static __always_inline unsigned long
>   rseq_exit_to_user_mode_work(struct pt_regs *regs, unsigned long ti_work, const unsigned long mask)
>   {
>   	/*
>   	 * Check if all work bits have been cleared before handling rseq.
> +	 *
> +	 * In case of a seperate TIF_RSEQ this checks for all other bits to
> +	 * be cleared and TIF_RSEQ to be set.
>   	 */
> -	if ((ti_work & mask) != 0)
> -		return ti_work;
> -
> -	if (likely(!__rseq_exit_to_user_mode_restart(regs)))
> +	if ((ti_work & mask) != CHECK_TIF_RSEQ)
>   		return ti_work;
>   
> +	if (likely(!__rseq_exit_to_user_mode_restart(regs))) {
> +		clear_tif_rseq();
> +		return ti_work & ~CHECK_TIF_RSEQ;
> +	}
>   	return ti_work | _TIF_NOTIFY_RESUME;
>   }
>   
> --- a/include/linux/thread_info.h
> +++ b/include/linux/thread_info.h
> @@ -67,6 +67,11 @@ enum syscall_work_bit {
>   #define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
>   #endif
>   
> +#ifndef TIF_RSEQ
> +# define TIF_RSEQ	TIF_NOTIFY_RESUME
> +# define _TIF_RSEQ	_TIF_NOTIFY_RESUME
> +#endif
> +
>   #ifdef __KERNEL__
>   
>   #ifndef arch_set_restart_data
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ