[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250813162824.036832236@linutronix.de>
Date: Wed, 13 Aug 2025 18:29:22 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Michael Jeanson <mjeanson@...icios.com>,
Peter Zijlstra <peterz@...radead.org>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
"Paul E. McKenney" <paulmck@...nel.org>,
Boqun Feng <boqun.feng@...il.com>,
Wei Liu <wei.liu@...nel.org>,
Jens Axboe <axboe@...nel.dk>
Subject: [patch 04/11] rseq: Replace the pointless event mask bit fiddling
Since commit 0190e4198e47 ("rseq: Deprecate RSEQ_CS_FLAG_NO_RESTART_ON_*
flags") the bits in task::rseq_event_mask are meaningless and just extra
work in terms of setting them individually.
Collapse them all into a single boolean which simplifies the code a lot.
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Cc: "Paul E. McKenney" <paulmck@...nel.org>
Cc: Boqun Feng <boqun.feng@...il.com>
---
include/linux/rseq.h | 59 +++++++++++-----------------------------------
include/linux/sched.h | 10 +++----
include/uapi/linux/rseq.h | 21 +++++-----------
kernel/rseq.c | 26 ++++++++++++--------
kernel/sched/core.c | 8 +++---
kernel/sched/membarrier.c | 8 +++---
6 files changed, 51 insertions(+), 81 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -7,28 +7,6 @@
#include <linux/preempt.h>
#include <linux/sched.h>
-#ifdef CONFIG_MEMBARRIER
-# define RSEQ_EVENT_GUARD irq
-#else
-# define RSEQ_EVENT_GUARD preempt
-#endif
-
-/*
- * Map the event mask on the user-space ABI enum rseq_cs_flags
- * for direct mask checks.
- */
-enum rseq_event_mask_bits {
- RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT,
- RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT,
- RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT,
-};
-
-enum rseq_event_mask {
- RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT),
- RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT),
- RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT),
-};
-
static inline void rseq_set_notify_resume(struct task_struct *t)
{
if (t->rseq)
@@ -47,30 +25,25 @@ static inline void rseq_handle_notify_re
static inline void rseq_signal_deliver(struct ksignal *ksig,
struct pt_regs *regs)
{
- scoped_guard(RSEQ_EVENT_GUARD)
- __set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask);
- rseq_handle_notify_resume(ksig, regs);
-}
-
-/* rseq_preempt() requires preemption to be disabled. */
-static inline void rseq_preempt(struct task_struct *t)
-{
- __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ if (current->rseq) {
+ current->rseq_event_pending = true;
+ __rseq_handle_notify_resume(ksig, regs);
+ }
}
-/* rseq_migrate() requires preemption to be disabled. */
-static inline void rseq_migrate(struct task_struct *t)
+static inline void rseq_notify_event(struct task_struct *t)
{
- __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask);
- rseq_set_notify_resume(t);
+ if (t->rseq) {
+ t->rseq_event_pending = true;
+ set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
+ }
}
static __always_inline void rseq_exit_to_user_mode(void)
{
if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
- if (WARN_ON_ONCE(current->rseq && current->rseq_event_mask))
- current->rseq_event_mask = 0;
+ if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending))
+ current->rseq_event_pending = false;
}
}
@@ -84,12 +57,12 @@ static inline void rseq_fork(struct task
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ t->rseq_event_pending = false;
} else {
t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
- t->rseq_event_mask = current->rseq_event_mask;
+ t->rseq_event_pending = current->rseq_event_pending;
}
}
@@ -98,17 +71,15 @@ static inline void rseq_execve(struct ta
t->rseq = NULL;
t->rseq_len = 0;
t->rseq_sig = 0;
- t->rseq_event_mask = 0;
+ t->rseq_event_pending = false;
}
#else /* CONFIG_RSEQ */
static inline void rseq_set_notify_resume(struct task_struct *t) { }
static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { }
static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { }
-static inline void rseq_preempt(struct task_struct *t) { }
-static inline void rseq_migrate(struct task_struct *t) { }
+static inline void rseq_notify_event(struct task_struct *t) { }
static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { }
-static inline void rseq_execve(struct task_struct *t) { }
static inline void rseq_exit_to_user_mode(void) { }
#endif /* !CONFIG_RSEQ */
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1401,14 +1401,14 @@ struct task_struct {
#endif /* CONFIG_NUMA_BALANCING */
#ifdef CONFIG_RSEQ
- struct rseq __user *rseq;
- u32 rseq_len;
- u32 rseq_sig;
+ struct rseq __user *rseq;
+ u32 rseq_len;
+ u32 rseq_sig;
/*
- * RmW on rseq_event_mask must be performed atomically
+ * RmW on rseq_event_pending must be performed atomically
* with respect to preemption.
*/
- unsigned long rseq_event_mask;
+ bool rseq_event_pending;
# ifdef CONFIG_DEBUG_RSEQ
/*
* This is a place holder to save a copy of the rseq fields for
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -114,20 +114,13 @@ struct rseq {
/*
* Restartable sequences flags field.
*
- * This field should only be updated by the thread which
- * registered this data structure. Read by the kernel.
- * Mainly used for single-stepping through rseq critical sections
- * with debuggers.
- *
- * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
- * Inhibit instruction sequence block restart on preemption
- * for this thread.
- * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
- * Inhibit instruction sequence block restart on signal
- * delivery for this thread.
- * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
- * Inhibit instruction sequence block restart on migration for
- * this thread.
+ * This field was initialy intended to allow event masking for for
+ * single-stepping through rseq critical sections with debuggers.
+ * The kernel does not support this anymore and the relevant bits
+ * are checked for being always false:
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL
+ * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
*/
__u32 flags;
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -19,6 +19,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/rseq.h>
+#ifdef CONFIG_MEMBARRIER
+# define RSEQ_EVENT_GUARD irq
+#else
+# define RSEQ_EVENT_GUARD preempt
+#endif
+
/* The original rseq structure size (including padding) is 32 bytes. */
#define ORIG_RSEQ_SIZE 32
@@ -432,11 +438,11 @@ void __rseq_handle_notify_resume(struct
*/
if (regs) {
/*
- * Read and clear the event mask first. If the task was not
- * preempted or migrated or a signal is on the way, there
- * is no point in doing any of the heavy lifting here on
- * production kernels. In that case TIF_NOTIFY_RESUME was
- * raised by some other functionality.
+ * Read and clear the event pending bit first. If the task
+ * was not preempted or migrated or a signal is on the way,
+ * there is no point in doing any of the heavy lifting here
+ * on production kernels. In that case TIF_NOTIFY_RESUME
+ * was raised by some other functionality.
*
* This is correct because the read/clear operation is
* guarded against scheduler preemption, which makes it CPU
@@ -449,15 +455,15 @@ void __rseq_handle_notify_resume(struct
* with the result handed in to allow the detection of
* inconsistencies.
*/
- u32 event_mask;
+ bool event;
scoped_guard(RSEQ_EVENT_GUARD) {
- event_mask = t->rseq_event_mask;
- t->rseq_event_mask = 0;
+ event = t->rseq_event_pending;
+ t->rseq_event_pending = false;
}
- if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event_mask) {
- ret = rseq_ip_fixup(regs, !!event_mask);
+ if (IS_ENABLED(CONFIG_DEBUG_RSEQ) || event) {
+ ret = rseq_ip_fixup(regs, event);
if (unlikely(ret < 0))
goto error;
}
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3364,7 +3364,7 @@ void set_task_cpu(struct task_struct *p,
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
- rseq_migrate(p);
+ rseq_notify_event(p);
sched_mm_cid_migrate_from(p);
perf_event_task_migrate(p);
}
@@ -4795,7 +4795,7 @@ int sched_cgroup_fork(struct task_struct
p->sched_task_group = tg;
}
#endif
- rseq_migrate(p);
+ rseq_notify_event(p);
/*
* We're setting the CPU for the first time, we don't migrate,
* so use __set_task_cpu().
@@ -4859,7 +4859,7 @@ void wake_up_new_task(struct task_struct
* as we're not fully set-up yet.
*/
p->recent_used_cpu = task_cpu(p);
- rseq_migrate(p);
+ rseq_notify_event(p);
__set_task_cpu(p, select_task_rq(p, task_cpu(p), &wake_flags));
rq = __task_rq_lock(p, &rf);
update_rq_clock(rq);
@@ -5153,7 +5153,7 @@ prepare_task_switch(struct rq *rq, struc
kcov_prepare_switch(prev);
sched_info_switch(rq, prev, next);
perf_event_task_sched_out(prev, next);
- rseq_preempt(prev);
+ rseq_notify_event(prev);
fire_sched_out_preempt_notifiers(prev, next);
kmap_local_sched_out();
prepare_task(next);
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -199,7 +199,7 @@ static void ipi_rseq(void *info)
* is negligible.
*/
smp_mb();
- rseq_preempt(current);
+ rseq_notify_event(current);
}
static void ipi_sync_rq_state(void *info)
@@ -407,9 +407,9 @@ static int membarrier_private_expedited(
* membarrier, we will end up with some thread in the mm
* running without a core sync.
*
- * For RSEQ, don't rseq_preempt() the caller. User code
- * is not supposed to issue syscalls at all from inside an
- * rseq critical section.
+ * For RSEQ, don't invoke rseq_notify_event() on the
+ * caller. User code is not supposed to issue syscalls at
+ * all from inside an rseq critical section.
*/
if (flags != MEMBARRIER_FLAG_SYNC_CORE) {
preempt_disable();
Powered by blists - more mailing lists