lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9a94b9d4-708c-476a-bf7d-7deb1c14f1ac@efficios.com>
Date: Mon, 25 Aug 2025 14:11:37 -0400
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Thomas Gleixner <tglx@...utronix.de>, LKML <linux-kernel@...r.kernel.org>
Cc: Jens Axboe <axboe@...nel.dk>, Peter Zijlstra <peterz@...radead.org>,
 "Paul E. McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
 Paolo Bonzini <pbonzini@...hat.com>, Sean Christopherson
 <seanjc@...gle.com>, Wei Liu <wei.liu@...nel.org>,
 Dexuan Cui <decui@...rosoft.com>, x86@...nel.org,
 Arnd Bergmann <arnd@...db.de>, Heiko Carstens <hca@...ux.ibm.com>,
 Christian Borntraeger <borntraeger@...ux.ibm.com>,
 Sven Schnelle <svens@...ux.ibm.com>, Huacai Chen <chenhuacai@...nel.org>,
 Paul Walmsley <paul.walmsley@...ive.com>, Palmer Dabbelt <palmer@...belt.com>
Subject: Re: [patch V2 09/37] rseq: Introduce struct rseq_event

On 2025-08-23 12:39, Thomas Gleixner wrote:
> In preparation for a major rewrite of this code, provide a data structure
> for event management.
> 
> Put the sched_switch event and a indicator for RSEQ on a task into it as a
> start. That uses a union, which allows to mask and clear the whole lot
> efficiently.
> 
> The indicators are explicitely not a bit field. Bit fields generate abysmal

explicitly

> code.
> 
> The boolean members are defined as u8 as that actually guarantees that it
> fits. There seem to be strange architecture ABIs which need more than 8bits
> for a boolean.
> 
> The has_rseq member is redudandant vs. task::rseq, but it turns out that

redundant

> boolean operations and quick checks on the union generate better code than
> fiddling with seperate entities and data types.

separate

> 
> This struct will be extended over time to carry more information.
> 
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
>   include/linux/rseq.h       |   23 ++++++++++++-----------
>   include/linux/rseq_types.h |   30 ++++++++++++++++++++++++++++++
>   include/linux/sched.h      |    7 ++-----
>   kernel/rseq.c              |    6 ++++--
>   4 files changed, 48 insertions(+), 18 deletions(-)
> 
> --- a/include/linux/rseq.h
> +++ b/include/linux/rseq.h
> @@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
>   
>   static inline void rseq_handle_notify_resume(struct pt_regs *regs)
>   {
> -	if (current->rseq)
> +	if (current->rseq_event.has_rseq)
>   		__rseq_handle_notify_resume(NULL, regs);
>   }
>   
>   static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
>   {
> -	if (current->rseq) {
> -		current->rseq_event_pending = true;
> +	if (current->rseq_event.has_rseq) {
> +		current->rseq_event.sched_switch = true;
>   		__rseq_handle_notify_resume(ksig, regs);
>   	}
>   }
>   
>   static inline void rseq_sched_switch_event(struct task_struct *t)
>   {
> -	if (t->rseq) {
> -		t->rseq_event_pending = true;
> +	if (t->rseq_event.has_rseq) {
> +		t->rseq_event.sched_switch = true;
>   		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
>   	}
>   }
> @@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
>   static __always_inline void rseq_exit_to_user_mode(void)
>   {
>   	if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
> -		if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
> -			current->rseq_event_pending = false;
> +		if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
> +				 current->rseq_event.events))
> +			current->rseq_event.events = 0;
>   	}
>   }
>   
> @@ -49,7 +50,7 @@ static __always_inline void rseq_exit_to
>    */
>   static inline void rseq_virt_userspace_exit(void)
>   {
> -	if (current->rseq_event_pending)
> +	if (current->rseq_event.sched_switch)
>   		set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
>   }
>   
> @@ -63,12 +64,12 @@ static inline void rseq_fork(struct task
>   		t->rseq = NULL;
>   		t->rseq_len = 0;
>   		t->rseq_sig = 0;
> -		t->rseq_event_pending = false;
> +		t->rseq_event.all = 0;
>   	} else {
>   		t->rseq = current->rseq;
>   		t->rseq_len = current->rseq_len;
>   		t->rseq_sig = current->rseq_sig;
> -		t->rseq_event_pending = current->rseq_event_pending;
> +		t->rseq_event = current->rseq_event;
>   	}
>   }
>   
> @@ -77,7 +78,7 @@ static inline void rseq_execve(struct ta
>   	t->rseq = NULL;
>   	t->rseq_len = 0;
>   	t->rseq_sig = 0;
> -	t->rseq_event_pending = false;
> +	t->rseq_event.all = 0;
>   }
>   
>   #else /* CONFIG_RSEQ */
> --- /dev/null
> +++ b/include/linux/rseq_types.h
> @@ -0,0 +1,30 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_RSEQ_TYPES_H
> +#define _LINUX_RSEQ_TYPES_H
> +
> +#include <linux/types.h>
> +
> +/*
> + * struct rseq_event - Storage for rseq related event management
> + * @all:		Compound to initialize and clear the data efficiently
> + * @events:		Compund to access events with a single load/store

Compound

> + * @sched_switch:	True if the task was scheduled out
> + * @has_rseq:		True if the task has a rseq pointer installed
> + */
> +struct rseq_event {
> +	union {
> +		u32				all;
> +		struct {
> +			union {
> +				u16		events;
> +				struct {
> +					u8	sched_switch;
> +				};

Is alpha still supported, or can we assume bytewise loads/stores ?

Are those events meant to each consume 1 byte (which limits us to 2
events for a 2-byte "events"/4-byte "all"), or is the plan to update
them with bitwise or/~ and ?

Thanks,

Mathieu

> +			};
> +
> +			u8			has_rseq;
> +		};
> +	};
> +};
> +
> +#endif
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -41,6 +41,7 @@
>   #include <linux/task_io_accounting.h>
>   #include <linux/posix-timers_types.h>
>   #include <linux/restart_block.h>
> +#include <linux/rseq_types.h>
>   #include <uapi/linux/rseq.h>
>   #include <linux/seqlock_types.h>
>   #include <linux/kcsan.h>
> @@ -1404,11 +1405,7 @@ struct task_struct {
>   	struct rseq __user		*rseq;
>   	u32				rseq_len;
>   	u32				rseq_sig;
> -	/*
> -	 * RmW on rseq_event_pending must be performed atomically
> -	 * with respect to preemption.
> -	 */
> -	bool				rseq_event_pending;
> +	struct rseq_event		rseq_event;
>   # ifdef CONFIG_DEBUG_RSEQ
>   	/*
>   	 * This is a place holder to save a copy of the rseq fields for
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
>   	 * inconsistencies.
>   	 */
>   	scoped_guard(RSEQ_EVENT_GUARD) {
> -		event = t->rseq_event_pending;
> -		t->rseq_event_pending = false;
> +		event = t->rseq_event.sched_switch;
> +		t->rseq_event.sched_switch = false;
>   	}
>   
>   	if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
> @@ -523,6 +523,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   		current->rseq = NULL;
>   		current->rseq_sig = 0;
>   		current->rseq_len = 0;
> +		current->rseq_event.all = 0;
>   		return 0;
>   	}
>   
> @@ -595,6 +596,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>   	 * registered, ensure the cpu_id_start and cpu_id fields
>   	 * are updated before returning to user-space.
>   	 */
> +	current->rseq_event.has_rseq = true;
>   	rseq_sched_switch_event(current);
>   
>   	return 0;
> 


-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ