[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251029144538.5eb5c772@gandalf.local.home>
Date: Wed, 29 Oct 2025 14:45:38 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>, Peter Zijlstra
<peterz@...radead.org>, Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
"Paul E. McKenney" <paulmck@...nel.org>, Boqun Feng <boqun.feng@...il.com>,
Jonathan Corbet <corbet@....net>, Prakash Sangappa
<prakash.sangappa@...cle.com>, Madadi Vineeth Reddy
<vineethr@...ux.ibm.com>, K Prateek Nayak <kprateek.nayak@....com>,
Sebastian Andrzej Siewior <bigeasy@...utronix.de>, Arnd Bergmann
<arnd@...db.de>, linux-arch@...r.kernel.org
Subject: Re: [patch V3 08/12] rseq: Implement time slice extension
enforcement timer
On Wed, 29 Oct 2025 14:22:26 +0100 (CET)
Thomas Gleixner <tglx@...utronix.de> wrote:
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -86,8 +86,24 @@ static __always_inline bool rseq_slice_e
> {
> return static_branch_likely(&rseq_slice_extension_key);
> }
> +
> +extern unsigned int rseq_slice_ext_nsecs;
> +bool __rseq_arm_slice_extension_timer(void);
> +
> +static __always_inline bool rseq_arm_slice_extension_timer(void)
> +{
> + if (!rseq_slice_extension_enabled())
> + return false;
> +
> + if (likely(!current->rseq.slice.state.granted))
> + return false;
> +
> + return __rseq_arm_slice_extension_timer();
> +}
> +
> #else /* CONFIG_RSEQ_SLICE_EXTENSION */
> static inline bool rseq_slice_extension_enabled(void) { return false; }
> +static inline bool rseq_arm_slice_extension_timer(void) { return false; }
> #endif /* !CONFIG_RSEQ_SLICE_EXTENSION */
>
> bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);
> @@ -542,17 +558,19 @@ static __always_inline void clear_tif_rs
> static __always_inline bool
> rseq_exit_to_user_mode_restart(struct pt_regs *regs, unsigned long ti_work)
> {
> - if (likely(!test_tif_rseq(ti_work)))
> - return false;
> -
> - if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
> - current->rseq.event.slowpath = true;
> - set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
> - return true;
> + if (unlikely(test_tif_rseq(ti_work))) {
> + if (unlikely(__rseq_exit_to_user_mode_restart(regs))) {
> + current->rseq.event.slowpath = true;
> + set_tsk_thread_flag(current, TIF_NOTIFY_RESUME);
> + return true;
Just to make sure I understand this. By setting TIF_NOTIFY_RESUME and
returning true it can still comeback to set the timer?
I guess this also begs the question of if user space can use both the
restartable sequences at the same time as requesting an extended time slice?
> + }
> + clear_tif_rseq();
> }
> -
> - clear_tif_rseq();
> - return false;
> + /*
> + * Arm the slice extension timer if nothing to do anymore and the
> + * task really goes out to user space.
> + */
> + return rseq_arm_slice_extension_timer();
> }
>
> #endif /* CONFIG_GENERIC_ENTRY */
> --- a/include/linux/rseq_types.h
> +++ b/include/linux/rseq_types.h
> @@ -89,9 +89,11 @@ union rseq_slice_state {
> /**
> * struct rseq_slice - Status information for rseq time slice extension
> * @state: Time slice extension state
> + * @expires: The time when a grant expires
> */
> struct rseq_slice {
> union rseq_slice_state state;
> + u64 expires;
> };
>
> /**
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -71,6 +71,8 @@
> #define RSEQ_BUILD_SLOW_PATH
>
> #include <linux/debugfs.h>
> +#include <linux/hrtimer.h>
> +#include <linux/percpu.h>
> #include <linux/prctl.h>
> #include <linux/ratelimit.h>
> #include <linux/rseq_entry.h>
> @@ -499,8 +501,78 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
> }
>
> #ifdef CONFIG_RSEQ_SLICE_EXTENSION
> +struct slice_timer {
> + struct hrtimer timer;
> + void *cookie;
> +};
> +
> +unsigned int rseq_slice_ext_nsecs __read_mostly = 30 * NSEC_PER_USEC;
> +static DEFINE_PER_CPU(struct slice_timer, slice_timer);
> DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key);
>
> +static enum hrtimer_restart rseq_slice_expired(struct hrtimer *tmr)
> +{
> + struct slice_timer *st = container_of(tmr, struct slice_timer, timer);
> +
> + if (st->cookie == current && current->rseq.slice.state.granted) {
> + rseq_stat_inc(rseq_stats.s_expired);
> + set_need_resched_current();
> + }
> + return HRTIMER_NORESTART;
> +}
> +
> +bool __rseq_arm_slice_extension_timer(void)
> +{
> + struct slice_timer *st = this_cpu_ptr(&slice_timer);
> + struct task_struct *curr = current;
> +
> + lockdep_assert_irqs_disabled();
> +
> + /*
> + * This check prevents that a granted time slice extension exceeds
This check prevents a granted time slice ...
> + * the maximum scheduling latency when the grant expired before
> + * going out to user space. Don't bother to clear the grant here,
> + * it will be cleaned up automatically before going out to user
> + * space.
> + */
> + if ((unlikely(curr->rseq.slice.expires < ktime_get_mono_fast_ns()))) {
> + set_need_resched_current();
> + return true;
> + }
> +
> + /*
> + * Store the task pointer as a cookie for comparison in the timer
> + * function. This is safe as the timer is CPU local and cannot be
> + * in the expiry function at this point.
> + */
I'm just curious in this scenario:
1) Task A requests an extension and is granted.
st->cookie = Task A
hrtimer_start();
2) Before getting back to user space, a RT kernel thread wakes up and
preempts Task A. Does this clear the timer?
3) RT kernel thread finishes but then schedules Task B within the expiry.
4) Task B requests an extension (assuming it had a short time slice that
allowed it to end before the expiry of the original timer).
I guess it doesn't matter that st->cookie = Task B, as Task A was already
scheduled out. But would calling hrtimer_start() on an existing timer cause
any issue?
I guess it doesn't matter as it looks like the code in hrtimer_start() does
indeed remove an existing timer.
> + st->cookie = curr;
> + hrtimer_start(&st->timer, curr->rseq.slice.expires, HRTIMER_MODE_ABS_PINNED_HARD);
> + /* Arm the syscall entry work */
> + set_task_syscall_work(curr, SYSCALL_RSEQ_SLICE);
> + return false;
> +}
> +
> +static void rseq_cancel_slice_extension_timer(void)
> +{
> + struct slice_timer *st = this_cpu_ptr(&slice_timer);
> +
> + /*
> + * st->cookie can be safely read as preemption is disabled and the
> + * timer is CPU local.
> + *
> + * As this is most probably the first expiring timer, the cancel is
As this is probably the first ...
> + * expensive as it has to reprogram the hardware, but that's less
> + * expensive than going through a full hrtimer_interrupt() cycle
> + * for nothing.
> + *
> + * hrtimer_try_to_cancel() is sufficient here as the timer is CPU
> + * local and once the hrtimer code disabled interrupts the timer
> + * callback cannot be running.
> + */
> + if (st->cookie == current)
> + hrtimer_try_to_cancel(&st->timer);
If the above scenario did happen, the timer will go off as
st->cookie == current would likely be false?
Hmm, if it does go off and the task did schedule back in, would it get its
need_resched set? This is a very unlikely scenario thus I guess it doesn't
really matter.
I'm just thinking about corner cases and how it could affect this code and
possibly cause noticeable issues.
-- Steve
> +}
> +
> static inline void rseq_slice_set_need_resched(struct task_struct *curr)
> {
> /*
> @@ -558,10 +630,11 @@ void rseq_syscall_enter_work(long syscal
> rseq_stat_inc(rseq_stats.s_yielded);
>
> /*
> - * Required to make set_tsk_need_resched() correct on PREEMPT[RT]
> - * kernels.
> + * Required to stabilize the per CPU timer pointer and to make
> + * set_tsk_need_resched() correct on PREEMPT[RT] kernels.
> */
> scoped_guard(preempt) {
> + rseq_cancel_slice_extension_timer();
> /*
> * Now that preemption is disabled, quickly check whether
> * the task was already rescheduled before arriving here.
> @@ -652,6 +725,31 @@ SYSCALL_DEFINE0(rseq_slice_yield)
> return 0;
> }
>
> +#ifdef CONFIG_SYSCTL
> +static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC;
> +static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC;
> +
> +static const struct ctl_table rseq_slice_ext_sysctl[] = {
> + {
> + .procname = "rseq_slice_extension_nsec",
> + .data = &rseq_slice_ext_nsecs,
> + .maxlen = sizeof(unsigned int),
> + .mode = 0644,
> + .proc_handler = proc_douintvec_minmax,
> + .extra1 = (unsigned int *)&rseq_slice_ext_nsecs_min,
> + .extra2 = (unsigned int *)&rseq_slice_ext_nsecs_max,
> + },
> +};
> +
> +static void rseq_slice_sysctl_init(void)
> +{
> + if (rseq_slice_extension_enabled())
> + register_sysctl_init("kernel", rseq_slice_ext_sysctl);
> +}
> +#else /* CONFIG_SYSCTL */
> +static inline void rseq_slice_sysctl_init(void) { }
> +#endif /* !CONFIG_SYSCTL */
> +
> static int __init rseq_slice_cmdline(char *str)
> {
> bool on;
> @@ -664,4 +762,17 @@ static int __init rseq_slice_cmdline(cha
> return 1;
> }
> __setup("rseq_slice_ext=", rseq_slice_cmdline);
> +
> +static int __init rseq_slice_init(void)
> +{
> + unsigned int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + hrtimer_setup(per_cpu_ptr(&slice_timer.timer, cpu), rseq_slice_expired,
> + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED_HARD);
> + }
> + rseq_slice_sysctl_init();
> + return 0;
> +}
> +device_initcall(rseq_slice_init);
> #endif /* CONFIG_RSEQ_SLICE_EXTENSION */
Powered by blists - more mailing lists