[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250813162824.164609663@linutronix.de>
Date: Wed, 13 Aug 2025 18:29:27 +0200 (CEST)
From: Thomas Gleixner <tglx@...utronix.de>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Michael Jeanson <mjeanson@...icios.com>,
Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
Peter Zijlstra <peterz@...radead.org>,
"Paul E. McKenney" <paulmck@...nel.org>,
Boqun Feng <boqun.feng@...il.com>,
Wei Liu <wei.liu@...nel.org>,
Jens Axboe <axboe@...nel.dk>
Subject: [patch 06/11] rseq: Optimize exit to user space further
Now that the event pending bit management is consistent, the invocation of
__rseq_handle_notify_resume() can be avoided if the event pending bit is
not set.
This is correct because of the following order:
1) if (TIF_NOTIFY_RESUME)
2) clear(TIF_NOTIFY_RESUME);
smp_mb__after_atomic();
3) if (event_pending)
4) __rseq_handle_notify_resume()
5) guard()
6) work = check_and_clear_pending();
Any new event, which hits between #1 and #2 will be visible in #3.
Any new event, which hits after #2, will either be visible in #3 and
therefore consumed in #6 or missed in #3. The latter is not a problem as
the new event will also re-raise TIF_NOTIFY_RESUME, which will cause the
calling exit loop take another round.
The quick check #3 optimizes for the common case, where event_pending is
false. Ignore the quick check when CONFIG_DEBUG_RSEQ is enabled to widen
the test coverage.
Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Cc: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: "Paul E. McKenney" <paulmck@...nel.org>
Cc: Boqun Feng <boqun.feng@...il.com>
---
include/linux/rseq.h | 8 +++++---
kernel/rseq.c | 17 +++++++++++++----
2 files changed, 18 insertions(+), 7 deletions(-)
--- a/include/linux/rseq.h
+++ b/include/linux/rseq.h
@@ -17,7 +17,7 @@ void __rseq_handle_notify_resume(struct
static inline void rseq_handle_notify_resume(struct pt_regs *regs)
{
- if (current->rseq)
+ if (IS_ENABLED(CONFIG_DEBUG_RESQ) || READ_ONCE(current->rseq_event_pending))
__rseq_handle_notify_resume(NULL, regs);
}
@@ -30,8 +30,10 @@ static inline void rseq_signal_deliver(s
static inline void rseq_notify_event(struct task_struct *t)
{
+ lockdep_assert_irqs_disabled();
+
if (t->rseq) {
- t->rseq_event_pending = true;
+ WRITE_ONCE(t->rseq_event_pending, true);
set_tsk_thread_flag(t, TIF_NOTIFY_RESUME);
}
}
@@ -59,7 +61,7 @@ static inline void rseq_fork(struct task
t->rseq = current->rseq;
t->rseq_len = current->rseq_len;
t->rseq_sig = current->rseq_sig;
- t->rseq_event_pending = current->rseq_event_pending;
+ t->rseq_event_pending = READ_ONCE(current->rseq_event_pending);
}
}
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -524,9 +524,17 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
ret = rseq_reset_rseq_cpu_node_id(current);
if (ret)
return ret;
- current->rseq = NULL;
- current->rseq_sig = 0;
- current->rseq_len = 0;
+
+ /*
+ * Ensure consistency of tsk::rseq and tsk::rseq_event_pending
+ * vs. the scheduler and the RSEQ IPIs.
+ */
+ scoped_guard(RSEQ_EVENT_GUARD) {
+ current->rseq = NULL;
+ current->rseq_sig = 0;
+ current->rseq_len = 0;
+ current->rseq_event_pending = false;
+ }
return 0;
}
@@ -601,7 +609,8 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
* registered, ensure the cpu_id_start and cpu_id fields
* are updated before returning to user-space.
*/
- rseq_set_notify_resume(current);
+ scoped_guard(irq)
+ rseq_notify_event(current);
return 0;
}
Powered by blists - more mailing lists