[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86b2fc16-16af-44d8-87f2-4b6c14713474@efficios.com>
Date: Tue, 26 Aug 2025 11:08:02 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Thomas Gleixner <tglx@...utronix.de>, LKML <linux-kernel@...r.kernel.org>
Cc: Jens Axboe <axboe@...nel.dk>, Peter Zijlstra <peterz@...radead.org>,
"Paul E. McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Paolo Bonzini <pbonzini@...hat.com>, Sean Christopherson
<seanjc@...gle.com>, Wei Liu <wei.liu@...nel.org>,
Dexuan Cui <decui@...rosoft.com>, x86@...nel.org,
Arnd Bergmann <arnd@...db.de>, Heiko Carstens <hca@...ux.ibm.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>, Huacai Chen <chenhuacai@...nel.org>,
Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>
Subject: Re: [patch V2 24/37] rseq: Seperate the signal delivery path
On 2025-08-23 12:40, Thomas Gleixner wrote:
Patch title "rseq: Separate the signal delivery path"
(Seperate -> Separate)
> Completely seperate the signal delivery path from the notify handler as
seperate -> separate
> they have different semantics versus the event handling.
>
> The signal delivery only needs to ensure that the interrupted user context
> was not in a critical section or the section is aborted before it switches
> to the signal frame context. The signal frame context does not have the
> original instruction pointer anymore, so that can't be handled on exit to
> user space.
>
> No point in updating the CPU/CID ids as they might change again before the
> task returns to user space for real.
>
> The fast path optimization, which checks for the 'entry from user via
> interrupt' condition is only available for architectures which use the
> generic entry code.
>
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
> include/linux/rseq.h | 21 ++++++++++++++++-----
> include/linux/rseq_entry.h | 29 +++++++++++++++++++++++++++++
> kernel/rseq.c | 30 ++++++++++++++++++++++--------
> 3 files changed, 67 insertions(+), 13 deletions(-)
>
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -5,22 +5,33 @@
> #ifdef CONFIG_RSEQ
> #include <linux/sched.h>
>
> -void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs);
> +void __rseq_handle_notify_resume(struct pt_regs *regs);
>
> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
> {
> if (current->rseq_event.has_rseq)
> - __rseq_handle_notify_resume(NULL, regs);
> + __rseq_handle_notify_resume(regs);
> }
>
> +void __rseq_signal_deliver(int sig, struct pt_regs *regs);
> +
> +/*
> + * Invoked from signal delivery to fixup based on the register context before
> + * switching to the signal delivery context.
> + */
> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
> {
> - if (current->rseq_event.has_rseq) {
> - current->rseq_event.sched_switch = true;
> - __rseq_handle_notify_resume(ksig, regs);
> + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
> + /* '&' is intentional to spare one conditional branch */
> + if (current->rseq_event.has_rseq & current->rseq_event.user_irq)
> + __rseq_signal_deliver(ksig->sig, regs);
> + } else {
> + if (current->rseq_event.has_rseq)
> + __rseq_signal_deliver(ksig->sig, regs);
> }
> }
>
> +/* Raised from context switch and exevce to force evaluation on exit to user */
Missing punctuation at the end of comment.
> static inline void rseq_sched_switch_event(struct task_struct *t)
> {
> if (t->rseq_event.has_rseq) {
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -364,6 +364,35 @@ bool rseq_set_uids_get_csaddr(struct tas
> return false;
> }
>
> +/*
> + * Update user space with new IDs and conditionally check whether the task
> + * is in a critical section.
> + */
> +static rseq_inline bool rseq_update_usr(struct task_struct *t, struct pt_regs *regs,
> + struct rseq_ids *ids, u32 node_id)
This patch introduces rseq_update_usr with no caller. Those come in
follow up patches. It would be good to say it up front in the commit
message if this is indeed the intended sequence of changes.
Thanks,
Mathieu
> +{
> + u64 csaddr;
> +
> + if (!rseq_set_uids_get_csaddr(t, ids, node_id, &csaddr))
> + return false;
> +
> + /*
> + * On architectures which utilize the generic entry code this
> + * allows to skip the critical section when the entry was not from
> + * a user space interrupt, unless debug mode is enabled.
> + */
> + if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
> + if (!static_branch_unlikely(&rseq_debug_enabled)) {
> + if (likely(!t->rseq_event.user_irq))
> + return true;
> + }
> + }
> + if (likely(!csaddr))
> + return true;
> + /* Sigh, this really needs to do work */
> + return rseq_update_user_cs(t, regs, csaddr);
> +}
> +
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> struct rseq_event *ev = ¤t->rseq_event;
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -247,13 +247,12 @@ static bool rseq_handle_cs(struct task_s
> * respect to other threads scheduled on the same CPU, and with respect
> * to signal handlers.
> */
> -void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
> +void __rseq_handle_notify_resume(struct pt_regs *regs)
> {
> struct task_struct *t = current;
> struct rseq_ids ids;
> u32 node_id;
> bool event;
> - int sig;
>
> /*
> * If invoked from hypervisors before entering the guest via
> @@ -272,10 +271,7 @@ void __rseq_handle_notify_resume(struct
> if (unlikely(t->flags & PF_EXITING))
> return;
>
> - if (ksig)
> - rseq_stat_inc(rseq_stats.signal);
> - else
> - rseq_stat_inc(rseq_stats.slowpath);
> + rseq_stat_inc(rseq_stats.slowpath);
>
> /*
> * Read and clear the event pending bit first. If the task
> @@ -314,8 +310,26 @@ void __rseq_handle_notify_resume(struct
> return;
>
> error:
> - sig = ksig ? ksig->sig : 0;
> - force_sigsegv(sig);
> + force_sig(SIGSEGV);
> +}
> +
> +void __rseq_signal_deliver(int sig, struct pt_regs *regs)
> +{
> + rseq_stat_inc(rseq_stats.signal);
> + /*
> + * Don't update IDs, they are handled on exit to user if
> + * necessary. The important thing is to abort a critical section of
> + * the interrupted context as after this point the instruction
> + * pointer in @regs points to the signal handler.
> + */
> + if (unlikely(!rseq_handle_cs(current, regs))) {
> + /*
> + * Clear the errors just in case this might survive
> + * magically, but leave the rest intact.
> + */
> + current->rseq_event.error = 0;
> + force_sigsegv(sig);
> + }
> }
>
> void __rseq_debug_syscall_return(struct pt_regs *regs)
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Powered by blists - more mailing lists