lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251027113822.UfDZz0mf@linutronix.de>
Date: Mon, 27 Oct 2025 12:38:22 +0100
From: Sebastian Andrzej Siewior <bigeasy@...utronix.de>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>,
	Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
	Peter Zijlstra <peterz@...radead.org>,
	"Paul E. McKenney" <paulmck@...nel.org>,
	Boqun Feng <boqun.feng@...il.com>, Jonathan Corbet <corbet@....net>,
	Prakash Sangappa <prakash.sangappa@...cle.com>,
	Madadi Vineeth Reddy <vineethr@...ux.ibm.com>,
	K Prateek Nayak <kprateek.nayak@....com>,
	Steven Rostedt <rostedt@...dmis.org>, Arnd Bergmann <arnd@...db.de>,
	linux-arch@...r.kernel.org
Subject: Re: [patch V2 08/12] rseq: Implement time slice extension
 enforcement timer

On 2025-10-22 14:57:38 [+0200], Thomas Gleixner wrote:
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c
> @@ -500,8 +502,82 @@ SYSCALL_DEFINE4(rseq, struct rseq __user
>  }
>  
>  #ifdef CONFIG_RSEQ_SLICE_EXTENSION
> +struct slice_timer {
> +	struct hrtimer	timer;
> +	void		*cookie;
> +};
> +
> +unsigned int rseq_slice_ext_nsecs __read_mostly = 30 * NSEC_PER_USEC;
> +static DEFINE_PER_CPU(struct slice_timer, slice_timer);
>  DEFINE_STATIC_KEY_TRUE(rseq_slice_extension_key);
>  
> +static enum hrtimer_restart rseq_slice_expired(struct hrtimer *tmr)
> +{
> +	struct slice_timer *st = container_of(tmr, struct slice_timer, timer);
> +
> +	if (st->cookie == current && current->rseq.slice.state.granted) {
> +		rseq_stat_inc(rseq_stats.s_expired);
> +		set_need_resched_current();
> +	}

You arm the timer while leaving to userland. Once in userland the task
can be migrated to another CPU. Once migrated, this CPU can host another
task while the timer fires and does nothing.

> +	return HRTIMER_NORESTART;
> +}
> +
…
> +static void rseq_cancel_slice_extension_timer(void)
> +{
> +	struct slice_timer *st = this_cpu_ptr(&slice_timer);
> +
> +	/*
> +	 * st->cookie can be safely read as preemption is disabled and the
> +	 * timer is CPU local. The active check can obviously race with the
> +	 * hrtimer interrupt, but that's better than disabling interrupts
> +	 * unconditionally right away.
> +	 *
> +	 * As this is most probably the first expiring timer, the cancel is
> +	 * expensive as it has to reprogram the hardware, but that's less
> +	 * expensive than going through a full hrtimer_interrupt() cycle
> +	 * for nothing.
> +	 *
> +	 * hrtimer_try_to_cancel() is sufficient here as with interrupts
> +	 * disabled the timer callback cannot be running and the timer base
> +	 * is well determined as the timer is pinned on the local CPU.
> +	 */
> +	if (st->cookie == current && hrtimer_active(&st->timer)) {
> +		scoped_guard(irq)
> +			hrtimer_try_to_cancel(&st->timer);

I don't see why hrtimer_active() and IRQ-disable is a benefit here.
Unless you want to avoid a branch to hrtimer_try_to_cancel().

The function has its own hrtimer_active() check and disables interrupts
while accessing the hrtimer_base lock. Since preemption is disabled,
st->cookie remains stable.
It can fire right after the hrtimer_active() here. You could just

	if (st->cookie == current)
		hrtimer_try_to_cancel(&st->timer);

at the expense of a branch to hrtimer_try_to_cancel() if the timer
already expired (no interrupts off/on).

> +}
> +
>  static inline void rseq_slice_set_need_resched(struct task_struct *curr)
>  {
>  	/*
> @@ -654,6 +731,31 @@ SYSCALL_DEFINE0(rseq_slice_yield)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_SYSCTL
> +static const unsigned int rseq_slice_ext_nsecs_min = 10 * NSEC_PER_USEC;
> +static const unsigned int rseq_slice_ext_nsecs_max = 50 * NSEC_PER_USEC;
> +
> +static const struct ctl_table rseq_slice_ext_sysctl[] = {
> +	{
> +		.procname	= "rseq_slice_extension_nsec",
> +		.data		= &rseq_slice_ext_nsecs,
> +		.maxlen		= sizeof(unsigned int),
> +		.mode		= 0644,
> +		.proc_handler	= proc_douintvec_minmax,
> +		.extra1		= (unsigned int *)&rseq_slice_ext_nsecs_min,
> +		.extra2		= (unsigned int *)&rseq_slice_ext_nsecs_max,
…

maybe +

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index f3ee807b5d8b3..ed34d21ed94e4 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1228,6 +1228,12 @@ reboot-cmd (SPARC only)
 ROM/Flash boot loader. Maybe to tell it what to do after
 rebooting. ???
 
+rseq_slice_extension_nsec
+=========================
+
+A task may ask to delay its scheduling if it is in a critical section via the
+prctl(PR_RSEQ_SLICE_EXTENSION_SET) mechanism. This sets the maximum allowed
+extension in nanoseconds before a mandatory scheduling of the task is forced.
 
 sched_energy_aware
 ==================


Sebastian

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ