[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9b606d45-0d3e-4add-9936-380656475b55@efficios.com>
Date: Mon, 25 Aug 2025 15:39:52 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Thomas Gleixner <tglx@...utronix.de>, LKML <linux-kernel@...r.kernel.org>
Cc: Jens Axboe <axboe@...nel.dk>, Peter Zijlstra <peterz@...radead.org>,
"Paul E. McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Paolo Bonzini <pbonzini@...hat.com>, Sean Christopherson
<seanjc@...gle.com>, Wei Liu <wei.liu@...nel.org>,
Dexuan Cui <decui@...rosoft.com>, x86@...nel.org,
Arnd Bergmann <arnd@...db.de>, Heiko Carstens <hca@...ux.ibm.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>, Huacai Chen <chenhuacai@...nel.org>,
Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>
Subject: Re: [patch V2 36/37] rseq: Switch to TIF_RSEQ if supported
On 2025-08-23 12:40, Thomas Gleixner wrote:
> TIF_NOTIFY_RESUME is a multiplexing TIF bit, which is suboptimal especially
> with the RSEQ fast path depending on it, but not really handling it.
>
> Define a seperate TIF_RSEQ in the generic TIF space and enable the full
> seperation of fast and slow path for architectures which utilize that.
>
> That avoids the hassle with invocations of resume_user_mode_work() from
> hypervisors, which clear TIF_NOTIFY_RESUME. It makes the therefore required
> re-evaluation at the end of vcpu_run() a NOOP on architectures which
> utilize the generic TIF space and have a seperate TIF_RSEQ.
>
> The hypervisor TIF handling does not include the seperate TIF_RSEQ as there
> is no point in doing so. The guest does neither know nor care about the VMM
> host applications RSEQ state. That state is only relevant when the ioctl()
> returns to user space.
>
> The fastpath implementation still utilizes TIF_NOTIFY_RESUME for failure
> handling, but this only happens within exit_to_user_mode_loop(), so
> arguably the hypervisor ioctl() code is long done when this happens.
>
> This allows further optimizations for blocking syscall heavy workloads in a
> subsequent step.
>
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
> ---
> include/asm-generic/thread_info_tif.h | 3 +++
> include/linux/irq-entry-common.h | 2 +-
> include/linux/rseq.h | 13 ++++++++++---
> include/linux/rseq_entry.h | 23 +++++++++++++++++++----
> include/linux/thread_info.h | 5 +++++
> 5 files changed, 38 insertions(+), 8 deletions(-)
>
> --- a/include/asm-generic/thread_info_tif.h
> +++ b/include/asm-generic/thread_info_tif.h
> @@ -45,4 +45,7 @@
> # define _TIF_RESTORE_SIGMASK BIT(TIF_RESTORE_SIGMASK)
> #endif
>
> +#define TIF_RSEQ 11 // Run RSEQ fast path
> +#define _TIF_RSEQ BIT(TIF_RSEQ)
> +
> #endif /* _ASM_GENERIC_THREAD_INFO_TIF_H_ */
> --- a/include/linux/irq-entry-common.h
> +++ b/include/linux/irq-entry-common.h
> @@ -30,7 +30,7 @@
> #define EXIT_TO_USER_MODE_WORK \
> (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
> _TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY | \
> - _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
> + _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | _TIF_RSEQ | \
> ARCH_EXIT_TO_USER_MODE_WORK)
>
> /**
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -40,7 +40,7 @@ static inline void rseq_signal_deliver(s
>
> static inline void rseq_raise_notify_resume(struct task_struct *t)
> {
> - set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> + set_tsk_thread_flag(t, TIF_RSEQ);
> }
>
> /* Invoked from context switch to force evaluation on exit to user */
> @@ -122,7 +122,7 @@ static inline void rseq_force_update(voi
> */
> static inline void rseq_virt_userspace_exit(void)
> {
> - if (current->rseq_event.sched_switch)
> + if (!IS_ENABLED(CONFIG_HAVE_GENERIC_TIF_BITS) && current->rseq_event.sched_switch)
> rseq_raise_notify_resume(current);
> }
>
> @@ -147,9 +147,16 @@ static inline void rseq_fork(struct task
> /*
> * If it has rseq, force it into the slow path right away
> * because it is guaranteed to fault.
> + *
> + * Setting TIF_NOTIFY_RESUME is redundant but harmless for
> + * architectures which do not have a seperate TIF_RSEQ, but
> + * for those who do it's required to enforce the slow path
> + * as the scheduler sets only TIF_RSEQ.
> */
> - if (t->rseq_event.has_rseq)
> + if (t->rseq_event.has_rseq) {
> t->rseq_event.slowpath = true;
> + set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> + }
> }
> }
>
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -502,18 +502,33 @@ static __always_inline bool __rseq_exit_
> return true;
> }
>
> +#ifdef CONFIG_HAVE_GENERIC_TIF_BITS
> +# define CHECK_TIF_RSEQ _TIF_RSEQ
> +static __always_inline void clear_tif_rseq(void)
> +{
> + clear_thread_flag(TIF_RSEQ);
> +}
> +#else
> +# define CHECK_TIF_RSEQ 0UL
> +static inline void clear_tif_rseq(void) { }
> +#endif
> +
> static __always_inline unsigned long
> rseq_exit_to_user_mode_work(struct pt_regs *regs, unsigned long ti_work, const unsigned long mask)
> {
> /*
> * Check if all work bits have been cleared before handling rseq.
> + *
> + * In case of a seperate TIF_RSEQ this checks for all other bits to
> + * be cleared and TIF_RSEQ to be set.
> */
> - if ((ti_work & mask) != 0)
> - return ti_work;
> -
> - if (likely(!__rseq_exit_to_user_mode_restart(regs)))
> + if ((ti_work & mask) != CHECK_TIF_RSEQ)
> return ti_work;
>
> + if (likely(!__rseq_exit_to_user_mode_restart(regs))) {
> + clear_tif_rseq();
> + return ti_work & ~CHECK_TIF_RSEQ;
> + }
> return ti_work | _TIF_NOTIFY_RESUME;
> }
>
> --- a/include/linux/thread_info.h
> +++ b/include/linux/thread_info.h
> @@ -67,6 +67,11 @@ enum syscall_work_bit {
> #define _TIF_NEED_RESCHED_LAZY _TIF_NEED_RESCHED
> #endif
>
> +#ifndef TIF_RSEQ
> +# define TIF_RSEQ TIF_NOTIFY_RESUME
> +# define _TIF_RSEQ _TIF_NOTIFY_RESUME
> +#endif
> +
> #ifdef __KERNEL__
>
> #ifndef arch_set_restart_data
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Powered by blists - more mailing lists