linux-kernel - Re: [PATCH v1 1/1] rseq: Validate read-only fields under DEBUG

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f76b79c0-070a-44d6-ac8b-71d063f2357a@efficios.com>
Date: Tue, 12 Nov 2024 09:27:23 -0500
From: Mathieu Desnoyers <mathieu.desnoyers@...icios.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: linux-kernel@...r.kernel.org, "Paul E . McKenney" <paulmck@...nel.org>,
 Boqun Feng <boqun.feng@...il.com>, Andrew Morton
 <akpm@...ux-foundation.org>, Andy Lutomirski <luto@...nel.org>,
 Ingo Molnar <mingo@...hat.com>, Peter Oskolkov <posk@...gle.com>,
 Dmitry Vyukov <dvyukov@...gle.com>, Marco Elver <elver@...gle.com>,
 Florian Weimer <fweimer@...hat.com>, Carlos O'Donell <carlos@...hat.com>,
 DJ Delorie <dj@...hat.com>, libc-alpha@...rceware.org
Subject: Re: [PATCH v1 1/1] rseq: Validate read-only fields under DEBUG_RSEQ
 config

On 2024-11-12 05:04, Peter Zijlstra wrote:
> On Mon, Nov 11, 2024 at 02:22:14PM -0500, Mathieu Desnoyers wrote:
> 
> So I'm entirely agreeing with the intent, but perhaps we can argue a
> little on the implementation :-)

Of course, thanks for providing feedback!

> 
>> +#ifdef CONFIG_DEBUG_RSEQ
>> +static struct rseq *rseq_kernel_fields(struct task_struct *t)
>> +{
>> +	return (struct rseq *) t->rseq_fields;
>> +}
>> +
>> +static int rseq_validate_ro_fields(struct task_struct *t)
>> +{
>> +	u32 cpu_id_start, cpu_id, node_id, mm_cid;
>> +	struct rseq __user *rseq = t->rseq;
>> +
>> +	/*
>> +	 * Validate fields which are required to be read-only by
>> +	 * user-space.
>> +	 */
>> +	if (!user_read_access_begin(rseq, t->rseq_len))
>> +		goto efault;
>> +	unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
>> +	unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
>> +	unsafe_get_user(node_id, &rseq->node_id, efault_end);
>> +	unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
>> +	user_read_access_end();
>> +
>> +	if (cpu_id_start != rseq_kernel_fields(t)->cpu_id_start)
>> +		printk_ratelimited(KERN_WARNING
>> +			"Detected rseq cpu_id_start field corruption. Value: %u, expecting: %u (pid=%d).\n",
>> +			cpu_id_start, rseq_kernel_fields(t)->cpu_id_start, t->pid);
>> +	if (cpu_id != rseq_kernel_fields(t)->cpu_id)
>> +		printk_ratelimited(KERN_WARNING
>> +			"Detected rseq cpu_id field corruption. Value: %u, expecting: %u (pid=%d).\n",
>> +			cpu_id, rseq_kernel_fields(t)->cpu_id, t->pid);
>> +	if (node_id != rseq_kernel_fields(t)->node_id)
>> +		printk_ratelimited(KERN_WARNING
>> +			"Detected rseq node_id field corruption. Value: %u, expecting: %u (pid=%d).\n",
>> +			node_id, rseq_kernel_fields(t)->node_id, t->pid);
>> +	if (mm_cid != rseq_kernel_fields(t)->mm_cid)
>> +		printk_ratelimited(KERN_WARNING
>> +			"Detected rseq mm_cid field corruption. Value: %u, expecting: %u (pid=%d).\n",
>> +			mm_cid, rseq_kernel_fields(t)->mm_cid, t->pid);
> 
> So aside from this just being ugly, this also has the problem of getting
> the ratelimits out of sync and perhaps only showing partial corruption
> for any one task.
> 
> Completely untested hackery below.

Your approach looks indeed better than mine, I'll steal it with your
permission. :)

> 
>> @@ -423,6 +504,17 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
>>   	current->rseq = rseq;
>>   	current->rseq_len = rseq_len;
>>   	current->rseq_sig = sig;
>> +#ifdef CONFIG_DEBUG_RSEQ
>> +	/*
>> +	 * Initialize the in-kernel rseq fields copy for validation of
>> +	 * read-only fields.
>> +	 */
>> +	if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) ||
>> +	    get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) ||
>> +	    get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) ||
>> +	    get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid))
>> +		return -EFAULT;
>> +#endif
> 
> So I didn't change the above, but wouldn't it make more sense to do
> rseq_reset_rseq_cpu_node_id() here, but without the validation?

Indeed we could do this (for both DEBUG_RSEQ={y,n}), but it would add extra
useless stores to those userspace fields on rseq registration, which is
performed on every thread creation starting from glibc 2.35. The
rseq_set_notify_resume() invoked at the end of registration ensures that
those fields get populated before return to userspace.

So I am not against a more robust approach, but I'm reluctant to add redundant
work on thread creation.

> 
> ---
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1364,6 +1364,15 @@ struct task_struct {
>   	 * with respect to preemption.
>   	 */
>   	unsigned long rseq_event_mask;
> +# ifdef CONFIG_DEBUG_RSEQ
> +	/*
> +	 * This is a place holder to save a copy of the rseq fields for
> +	 * validation of read-only fields. The struct rseq has a
> +	 * variable-length array at the end, so it cannot be used
> +	 * directly. Reserve a size large enough for the known fields.
> +	 */
> +	char 				rseq_fields[sizeof(struct rseq)];
> +# endif
>   #endif
>   
>   #ifdef CONFIG_SCHED_MM_CID
> --- a/kernel/rseq.c
> +++ b/kernel/rseq.c

We should #include <linux/ratelimit.h> then.

> @@ -25,6 +25,79 @@
>   				  RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \
>   				  RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE)
>   
> +#ifdef CONFIG_DEBUG_RSEQ
> +static struct rseq *rseq_kernel_fields(struct task_struct *t)
> +{
> +	return (struct rseq *) t->rseq_fields;
> +}
> +
> +static int rseq_validate_ro_fields(struct task_struct *t)
> +{
> +	static DEFINE_RATELIMIT_STATE(_rs,
> +				      DEFAULT_RATELIMIT_INTERVAL,
> +				      DEFAULT_RATELIMIT_BURST);
> +	u32 cpu_id_start, cpu_id, node_id, mm_cid;
> +	struct rseq __user *rseq = t->rseq;
> +
> +	/*
> +	 * Validate fields which are required to be read-only by
> +	 * user-space.
> +	 */
> +	if (!user_read_access_begin(rseq, t->rseq_len))
> +		goto efault;
> +	unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end);
> +	unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end);
> +	unsafe_get_user(node_id, &rseq->node_id, efault_end);
> +	unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end);
> +	user_read_access_end();
> +
> +	if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start ||
> +	     cpu_id != rseq_kernel_fields(t)->cpu_id ||
> +	     node_id != rseq_kernel_fields(t)->node_id ||
> +	     mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) {
> +
> +		pr_warn("Detected rseq corruption for pid: %d;\n"
> +			"  cpu_id_start: %u ?= %u\n"
> +			"  cpu_id:       %u ?= %u\n"
> +			"  node_id:      %u ?= %u\n"
> +			"  mm_cid:       %u ?= %u\n"
> +			t->pid,
> +			cpu_id_start, rseq_kernel_fields(t)->cpu_id_start,
> +			cpu_id, rseq_kernel_fields(t)->cpu_id,
> +			node_id, rseq_kernel_fields(t)->node_id,
> +			mm_cid, rseq_kernel_fields(t)->mm_cid);
> +	}

It looks better, thanks.

> +
> +	/* For now, only print a console warning on mismatch. */
> +	return 0;
> +
> +efault_end:
> +	user_read_access_end();
> +efault:
> +	return -EFAULT;
> +}
> +
> +static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id,
> +			       u32 node_id, u32 mm_cid)
> +{
> +	rseq_kernel_fields(t)->cpu_id_start = cpu_id;
> +	rseq_kernel_fields(t)->cpu_id = cpu_id;
> +	rseq_kernel_fields(t)->node_id = node_id;
> +	rseq_kernel_fields(t)->mm_cid = mm_cid;
> +}

This too.

Thanks!

Mathieu

-- 
Mathieu Desnoyers
EfficiOS Inc.
https://www.efficios.com