[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9a94b9d4-708c-476a-bf7d-7deb1c14f1ac@efficios.com>
Date: Mon, 25 Aug 2025 14:11:37 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Thomas Gleixner <tglx@...utronix.de>, LKML <linux-kernel@...r.kernel.org>
Cc: Jens Axboe <axboe@...nel.dk>, Peter Zijlstra <peterz@...radead.org>,
"Paul E. McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Paolo Bonzini <pbonzini@...hat.com>, Sean Christopherson
<seanjc@...gle.com>, Wei Liu <wei.liu@...nel.org>,
Dexuan Cui <decui@...rosoft.com>, x86@...nel.org,
Arnd Bergmann <arnd@...db.de>, Heiko Carstens <hca@...ux.ibm.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>, Huacai Chen <chenhuacai@...nel.org>,
Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>
Subject: Re: [patch V2 09/37] rseq: Introduce struct rseq_event
On 2025-08-23 12:39, Thomas Gleixner wrote:
> In preparation for a major rewrite of this code, provide a data structure
> for event management.
>
> Put the sched_switch event and a indicator for RSEQ on a task into it as a
> start. That uses a union, which allows to mask and clear the whole lot
> efficiently.
>
> The indicators are explicitely not a bit field. Bit fields generate abysmal
explicitly
> code.
>
> The boolean members are defined as u8 as that actually guarantees that it
> fits. There seem to be strange architecture ABIs which need more than 8bits
> for a boolean.
>
> The has_rseq member is redudandant vs. task::rseq, but it turns out that
redundant
> boolean operations and quick checks on the union generate better code than
> fiddling with seperate entities and data types.
separate
>
> This struct will be extended over time to carry more information.
>
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
> include/linux/rseq.h | 23 ++++++++++++-----------
> include/linux/rseq_types.h | 30 ++++++++++++++++++++++++++++++
> include/linux/sched.h | 7 ++-----
> kernel/rseq.c | 6 ++++--
> 4 files changed, 48 insertions(+), 18 deletions(-)
>
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
>
> static inline void rseq_handle_notify_resume(struct pt_regs *regs)
> {
> - if (current->rseq)
> + if (current->rseq_event.has_rseq)
> __rseq_handle_notify_resume(NULL, regs);
> }
>
> static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
> {
> - if (current->rseq) {
> - current->rseq_event_pending = true;
> + if (current->rseq_event.has_rseq) {
> + current->rseq_event.sched_switch = true;
> __rseq_handle_notify_resume(ksig, regs);
> }
> }
>
> static inline void rseq_sched_switch_event(struct task_struct *t)
> {
> - if (t->rseq) {
> - t->rseq_event_pending = true;
> + if (t->rseq_event.has_rseq) {
> + t->rseq_event.sched_switch = true;
> set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
> }
> }
> @@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
> static __always_inline void rseq_exit_to_user_mode(void)
> {
> if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> - if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
> - current->rseq_event_pending = false;
> + if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
> + current->rseq_event.events))
> + current->rseq_event.events = 0;
> }
> }
>
> @@ -49,7 +50,7 @@ static __always_inline void rseq_exit_to
> */
> static inline void rseq_virt_userspace_exit(void)
> {
> - if (current->rseq_event_pending)
> + if (current->rseq_event.sched_switch)
> set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
> }
>
> @@ -63,12 +64,12 @@ static inline void rseq_fork(struct task
> t->rseq = NULL;
> t->rseq_len = 0;
> t->rseq_sig = 0;
> - t->rseq_event_pending = false;
> + t->rseq_event.all = 0;
> } else {
> t->rseq = current->rseq;
> t->rseq_len = current->rseq_len;
> t->rseq_sig = current->rseq_sig;
> - t->rseq_event_pending = current->rseq_event_pending;
> + t->rseq_event = current->rseq_event;
> }
> }
>
> @@ -77,7 +78,7 @@ static inline void rseq_execve(struct ta
> t->rseq = NULL;
> t->rseq_len = 0;
> t->rseq_sig = 0;
> - t->rseq_event_pending = false;
> + t->rseq_event.all = 0;
> }
>
> #else /* CONFIG_RSEQ */
> --- /dev/null
> +++ b/include/linux/rseq_types.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_TYPES_H
> +#define _LINUX_RSEQ_TYPES_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * struct rseq_event - Storage for rseq related event management
> + * @all: Compound to initialize and clear the data efficiently
> + * @events: Compund to access events with a single load/store
Compound
> + * @sched_switch: True if the task was scheduled out
> + * @has_rseq: True if the task has a rseq pointer installed
> + */
> +struct rseq_event {
> + union {
> + u32 all;
> + struct {
> + union {
> + u16 events;
> + struct {
> + u8 sched_switch;
> + };
Is alpha still supported, or can we assume bytewise loads/stores ?
Are those events meant to each consume 1 byte (which limits us to 2
events for a 2-byte "events"/4-byte "all"), or is the plan to update
them with bitwise or/~ and ?
Thanks,
Mathieu
> + };
> +
> + u8 has_rseq;
> + };
> + };
> +};
> +
> +#endif
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -41,6 +41,7 @@
> #include <linux/task_io_accounting.h>
> #include <linux/posix-timers_types.h>
> #include <linux/restart_block.h>
> +#include <linux/rseq_types.h>
> #include <uapi/linux/rseq.h>
> #include <linux/seqlock_types.h>
> #include <linux/kcsan.h>
> @@ -1404,11 +1405,7 @@ struct task_struct {
> struct rseq __user *rseq;
> u32 rseq_len;
> u32 rseq_sig;
> - /*
> - * RmW on rseq_event_pending must be performed atomically
> - * with respect to preemption.
> - */
> - bool rseq_event_pending;
> + struct rseq_event rseq_event;
> # ifdef CONFIG_DEBUG_RSEQ
> /*
> * This is a place holder to save a copy of the rseq fields for
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
> * inconsistencies.
> */
> scoped_guard(RSEQ_EVENT_GUARD) {
> - event = t->rseq_event_pending;
> - t->rseq_event_pending = false;
> + event = t->rseq_event.sched_switch;
> + t->rseq_event.sched_switch = false;
> }
>
> if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> @@ -523,6 +523,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> current->rseq = NULL;
> current->rseq_sig = 0;
> current->rseq_len = 0;
> + current->rseq_event.all = 0;
> return 0;
> }
>
> @@ -595,6 +596,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> * registered, ensure the cpu_id_start and cpu_id fields
> * are updated before returning to user-space.
> */
> + current->rseq_event.has_rseq = true;
> rseq_sched_switch_event(current);
>
> return 0;
>
--
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com
Powered by blists - more mailing lists