[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250823161653.843757955@linutronix.de>
Date: Sat, 23 Aug 2025 18:39:27 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Jens Axboe <axboe@...nel.dk>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Peter Zijlstra <peterz@...radead.org>,
"Paul E. McKenney" <paulmck@...nel.org>,
Boqun Feng <boqun.feng@...il.com>,
Paolo Bonzini <pbonzini@...hat.com>,
Sean Christopherson <seanjc@...gle.com>,
Wei Liu <wei.liu@...nel.org>,
Dexuan Cui <decui@...rosoft.com>,
x86@...nel.org,
Arnd Bergmann <arnd@...db.de>,
Heiko Carstens <hca@...ux.ibm.com>,
Christian Borntraeger <borntraeger@...ux.ibm.com>,
Sven Schnelle <svens@...ux.ibm.com>,
Huacai Chen <chenhuacai@...nel.org>,
Paul Walmsley <paul.walmsley@...ive.com>,
Palmer Dabbelt <palmer@...belt.com>
Subject: [patch V2 09/37] rseq: Introduce struct rseq_event
In preparation for a major rewrite of this code, provide a data structure
for event management.
Put the sched_switch event and a indicator for RSEQ on a task into it as a
start. That uses a union, which allows to mask and clear the whole lot
efficiently.
The indicators are explicitely not a bit field. Bit fields generate abysmal
code.
The boolean members are defined as u8 as that actually guarantees that it
fits. There seem to be strange architecture ABIs which need more than 8bits
for a boolean.
The has_rseq member is redudandant vs. task::rseq, but it turns out that
boolean operations and quick checks on the union generate better code than
fiddling with seperate entities and data types.
This struct will be extended over time to carry more information.
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
---
include/linux/rseq.h | 23 ++++++++++++-----------
include/linux/rseq_types.h | 30 ++++++++++++++++++++++++++++++
include/linux/sched.h | 7 ++-----
kernel/rseq.c | 6 ++++--
4 files changed, 48 insertions(+), 18 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
- if (current->rseq)
+ if (current->rseq_event.has_rseq)
__rseq_handle_notify_resume(NULL, regs);
}
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs)
{
- if (current->rseq) {
- current->rseq_event_pending = true;
+ if (current->rseq_event.has_rseq) {
+ current->rseq_event.sched_switch = true;
__rseq_handle_notify_resume(ksig, regs);
}
}
static inline void rseq_sched_switch_event(struct task_struct *t)
{
- if (t->rseq) {
- t->rseq_event_pending = true;
+ if (t->rseq_event.has_rseq) {
+ t->rseq_event.sched_switch = true;
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}
}
@@ -32,8 +32,9 @@ static inline void rseq_sched_switch_eve
static __always_inline void rseq_exit_to_user_mode(void)
{
if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
- current->rseq_event_pending = false;
+ if (WARN_ON_ONCE(current->rseq_event.has_rseq &&
+ current->rseq_event.events))
+ current->rseq_event.events = 0;
}
}
@@ -49,7 +50,7 @@ static __always_inline void rseq_exit_to
*/
static inline void rseq_virt_userspace_exit(void)
{
- if (current->rseq_event_pending)
+ if (current->rseq_event.sched_switch)
set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
}
@@ -63,12 +64,12 @@ static inline void rseq_fork(struct task
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
- t->rseq_event_pending = false;
+ t->rseq_event.all = 0;
} else {
t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
- t->rseq_event_pending = current->rseq_event_pending;
+ t->rseq_event = current->rseq_event;
}
}
@@ -77,7 +78,7 @@ static inline void rseq_execve(struct ta
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
- t->rseq_event_pending = false;
+ t->rseq_event.all = 0;
}
#else /* CONFIG_RSEQ */
--- /dev/null
+++ b/include/linux/rseq_types.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_RSEQ_TYPES_H
+#define _LINUX_RSEQ_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * struct rseq_event - Storage for rseq related event management
+ * @all: Compound to initialize and clear the data efficiently
+ * @events: Compund to access events with a single load/store
+ * @sched_switch: True if the task was scheduled out
+ * @has_rseq: True if the task has a rseq pointer installed
+ */
+struct rseq_event {
+ union {
+ u32 all;
+ struct {
+ union {
+ u16 events;
+ struct {
+ u8 sched_switch;
+ };
+ };
+
+ u8 has_rseq;
+ };
+ };
+};
+
+#endif
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -41,6 +41,7 @@
#include <linux/task_io_accounting.h>
#include <linux/posix-timers_types.h>
#include <linux/restart_block.h>
+#include <linux/rseq_types.h>
#include <uapi/linux/rseq.h>
#include <linux/seqlock_types.h>
#include <linux/kcsan.h>
@@ -1404,11 +1405,7 @@ struct task_struct {
struct rseq __user *rseq;
u32 rseq_len;
u32 rseq_sig;
- /*
- * RmW on rseq_event_pending must be performed atomically
- * with respect to preemption.
- */
- bool rseq_event_pending;
+ struct rseq_event rseq_event;
# ifdef CONFIG_DEBUG_RSEQ
/*
* This is a place holder to save a copy of the rseq fields for
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct
* inconsistencies.
*/
scoped_guard(RSEQ_EVENT_GUARD) {
- event = t->rseq_event_pending;
- t->rseq_event_pending = false;
+ event = t->rseq_event.sched_switch;
+ t->rseq_event.sched_switch = false;
}
if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event)
@@ -523,6 +523,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
current->rseq = NULL;
current->rseq_sig = 0;
current->rseq_len = 0;
+ current->rseq_event.all = 0;
return 0;
}
@@ -595,6 +596,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
+ current->rseq_event.has_rseq = true;
rseq_sched_switch_event(current);
return 0;
Powered by blists - more mailing lists