lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251029120405.569f9376@gandalf.local.home>
Date: Wed, 29 Oct 2025 12:04:05 -0400
From: Steven Rostedt <rostedt@...dmis.org>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>, Michael Jeanson
 <mjeanson@...icios.com>, Jens Axboe <axboe@...nel.dk>, Mathieu Desnoyers
 <mathieu.desnoyers@...icios.com>, Peter Zijlstra <peterz@...radead.org>,
 "Paul E. McKenney" <paulmck@...nel.org>, x86@...nel.org, Sean
 Christopherson <seanjc@...gle.com>, Wei Liu <wei.liu@...nel.org>
Subject: Re: [patch V6 19/31] rseq: Provide and use rseq_update_user_cs()

On Mon, 27 Oct 2025 09:44:57 +0100 (CET)
Thomas Gleixner <tglx@...utronix.de> wrote:

>  include/linux/rseq_entry.h |  206 +++++++++++++++++++++++++++++++++++++
>  include/linux/rseq_types.h |   11 +-
>  kernel/rseq.c              |  244 +++++++++++++--------------------------------
>  3 files changed, 290 insertions(+), 171 deletions(-)
> --- a/include/linux/rseq_entry.h
> +++ b/include/linux/rseq_entry.h
> @@ -36,6 +36,7 @@ DECLARE_PER_CPU(struct rseq_stats, rseq_
>  #ifdef CONFIG_RSEQ
>  #include <linux/jump_label.h>
>  #include <linux/rseq.h>
> +#include <linux/uaccess.h>
>  
>  #include <linux/tracepoint-defs.h>
>  
> @@ -67,12 +68,217 @@ static inline void rseq_trace_ip_fixup(u
>  
>  DECLARE_STATIC_KEY_MAYBE(CONFIG_RSEQ_DEBUG_DEFAULT_ENABLE, rseq_debug_enabled);
>  
> +#ifdef RSEQ_BUILD_SLOW_PATH
> +#define rseq_inline
> +#else
> +#define rseq_inline __always_inline
> +#endif
> +
> +bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr);

So rseq_debug_update_user_cs() is always declared.

> +
>  static __always_inline void rseq_note_user_irq_entry(void)
>  {
>  	if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY))
>  		current->rseq.event.user_irq = true;
>  }
>  
> +/*
> + * Check whether there is a valid critical section and whether the
> + * instruction pointer in @regs is inside the critical section.
> + *
> + *  - If the critical section is invalid, terminate the task.
> + *
> + *  - If valid and the instruction pointer is inside, set it to the abort IP
> + *
> + *  - If valid and the instruction pointer is outside, clear the critical
> + *    section address.
> + *
> + * Returns true, if the section was valid and either fixup or clear was
> + * done, false otherwise.
> + *
> + * In the failure case task::rseq_event::fatal is set when a invalid
> + * section was found. It's clear when the failure was an unresolved page
> + * fault.
> + *
> + * If inlined into the exit to user path with interrupts disabled, the
> + * caller has to protect against page faults with pagefault_disable().
> + *
> + * In preemptible task context this would be counterproductive as the page
> + * faults could not be fully resolved. As a consequence unresolved page
> + * faults in task context are fatal too.
> + */
> +
> +#ifdef RSEQ_BUILD_SLOW_PATH

But the function is only defined if RSEQ_BUILD_SLOW_PATH is defined.

> +/*
> + * The debug version is put out of line, but kept here so the code stays
> + * together.
> + *
> + * @csaddr has already been checked by the caller to be in user space
> + */
> +bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs,
> +			       unsigned long csaddr)
> +{
> +	struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
> +	u64 start_ip, abort_ip, offset, cs_end, head, tasksize = TASK_SIZE;
> +	unsigned long ip = instruction_pointer(regs);
> +	u64 __user *uc_head = (u64 __user *) ucs;
> +	u32 usig, __user *uc_sig;
> +
> +	scoped_user_rw_access(ucs, efault) {
> +		/*
> +		 * Evaluate the user pile and exit if one of the conditions
> +		 * is not fulfilled.
> +		 */
> +		unsafe_get_user(start_ip, &ucs->start_ip, efault);
> +		if (unlikely(start_ip >= tasksize))
> +			goto die;
> +		/* If outside, just clear the critical section. */
> +		if (ip < start_ip)
> +			goto clear;
> +
> +		unsafe_get_user(offset, &ucs->post_commit_offset, efault);
> +		cs_end = start_ip + offset;
> +		/* Check for overflow and wraparound */
> +		if (unlikely(cs_end >= tasksize || cs_end < start_ip))
> +			goto die;
> +
> +		/* If not inside, clear it. */
> +		if (ip >= cs_end)
> +			goto clear;
> +
> +		unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
> +		/* Ensure it's "valid" */
> +		if (unlikely(abort_ip >= tasksize || abort_ip < sizeof(*uc_sig)))
> +			goto die;
> +		/* Validate that the abort IP is not in the critical section */
> +		if (unlikely(abort_ip - start_ip < offset))
> +			goto die;
> +
> +		/*
> +		 * Check version and flags for 0. No point in emitting
> +		 * deprecated warnings before dying. That could be done in
> +		 * the slow path eventually, but *shrug*.
> +		 */
> +		unsafe_get_user(head, uc_head, efault);
> +		if (unlikely(head))
> +			goto die;
> +
> +		/* abort_ip - 4 is >= 0. See abort_ip check above */
> +		uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
> +		unsafe_get_user(usig, uc_sig, efault);
> +		if (unlikely(usig != t->rseq.sig))
> +			goto die;
> +
> +		/* rseq_event.user_irq is only valid if CONFIG_GENERIC_IRQ_ENTRY=y */
> +		if (IS_ENABLED(CONFIG_GENERIC_IRQ_ENTRY)) {
> +			/* If not in interrupt from user context, let it die */
> +			if (unlikely(!t->rseq.event.user_irq))
> +				goto die;
> +		}
> +		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
> +		instruction_pointer_set(regs, (unsigned long)abort_ip);
> +		rseq_stat_inc(rseq_stats.fixup);
> +		break;
> +	clear:
> +		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
> +		rseq_stat_inc(rseq_stats.clear);
> +		abort_ip = 0ULL;
> +	}
> +
> +	if (unlikely(abort_ip))
> +		rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
> +	return true;
> +die:
> +	t->rseq.event.fatal = true;
> +efault:
> +	return false;
> +}
> +

There's no:

#else

bool rseq_debug_update_user_cs(struct task_struct *t, struct pt_regs *regs,
			       unsigned long csaddr)
{
	return false;
}

> +#endif /* RSEQ_BUILD_SLOW_PATH */
> +
> +/*
> + * This only ensures that abort_ip is in the user address space and
> + * validates that it is preceded by the signature.
> + *
> + * No other sanity checks are done here, that's what the debug code is for.
> + */
> +static rseq_inline bool
> +rseq_update_user_cs(struct task_struct *t, struct pt_regs *regs, unsigned long csaddr)
> +{
> +	struct rseq_cs __user *ucs = (struct rseq_cs __user *)(unsigned long)csaddr;
> +	unsigned long ip = instruction_pointer(regs);
> +	u64 start_ip, abort_ip, offset;
> +	u32 usig, __user *uc_sig;
> +
> +	rseq_stat_inc(rseq_stats.cs);
> +
> +	if (unlikely(csaddr >= TASK_SIZE)) {
> +		t->rseq.event.fatal = true;
> +		return false;
> +	}
> +
> +	if (static_branch_unlikely(&rseq_debug_enabled))
> +		return rseq_debug_update_user_cs(t, regs, csaddr);

Wouldn't the above reference to rseq_debug_update_user_cs() fail to build
if RSEQ_BUILD_SLOW_PATH is not defined?

Or am I missing something?

I see that it looks like RSEQ_BUILD_SLOW_PATH is always defined, but why
have this logic if it can't be not defined?

-- Steve

> +
> +	scoped_user_rw_access(ucs, efault) {
> +		unsafe_get_user(start_ip, &ucs->start_ip, efault);
> +		unsafe_get_user(offset, &ucs->post_commit_offset, efault);
> +		unsafe_get_user(abort_ip, &ucs->abort_ip, efault);
> +
> +		/*
> +		 * No sanity checks. If user space screwed it up, it can
> +		 * keep the pieces. That's what debug code is for.
> +		 *
> +		 * If outside, just clear the critical section.
> +		 */
> +		if (ip - start_ip >= offset)
> +			goto clear;
> +
> +		/*
> +		 * Two requirements for @abort_ip:
> +		 *   - Must be in user space as x86 IRET would happily return to
> +		 *     the kernel.
> +		 *   - The four bytes preceding the instruction at @abort_ip must
> +		 *     contain the signature.
> +		 *
> +		 * The latter protects against the following attack vector:
> +		 *
> +		 * An attacker with limited abilities to write, creates a critical
> +		 * section descriptor, sets the abort IP to a library function or
> +		 * some other ROP gadget and stores the address of the descriptor
> +		 * in TLS::rseq::rseq_cs. An RSEQ abort would then evade ROP
> +		 * protection.
> +		 */
> +		if (abort_ip >= TASK_SIZE || abort_ip < sizeof(*uc_sig))
> +			goto die;
> +
> +		/* The address is guaranteed to be >= 0 and < TASK_SIZE */
> +		uc_sig = (u32 __user *)(unsigned long)(abort_ip - sizeof(*uc_sig));
> +		unsafe_get_user(usig, uc_sig, efault);
> +		if (unlikely(usig != t->rseq.sig))
> +			goto die;
> +
> +		/* Invalidate the critical section */
> +		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
> +		/* Update the instruction pointer */
> +		instruction_pointer_set(regs, (unsigned long)abort_ip);
> +		rseq_stat_inc(rseq_stats.fixup);
> +		break;
> +	clear:
> +		unsafe_put_user(0ULL, &t->rseq.usrptr->rseq_cs, efault);
> +		rseq_stat_inc(rseq_stats.clear);
> +		abort_ip = 0ULL;
> +	}
> +
> +	if (unlikely(abort_ip))
> +		rseq_trace_ip_fixup(ip, start_ip, offset, abort_ip);
> +	return true;
> +die:
> +	t->rseq.event.fatal = true;
> +efault:
> +	return false;
> +}

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ