lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2df50490-a493-4e27-a8b1-15b094ab99d8@suse.com>
Date:   Fri, 13 Oct 2023 15:39:01 +0300
From:   Nikolay Borisov <nik.borisov@...e.com>
To:     Brian Gerst <brgerst@...il.com>, linux-kernel@...r.kernel.org,
        x86@...nel.org, Andy Lutomirski <luto@...nel.org>
Cc:     Ingo Molnar <mingo@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        Borislav Petkov <bp@...en8.de>,
        "H . Peter Anvin" <hpa@...or.com>
Subject: Re: [PATCH v3 1/3] x86/entry/64: Convert SYSRET validation tests to C



On 12.10.23 г. 1:43 ч., Brian Gerst wrote:
> Signed-off-by: Brian Gerst <brgerst@...il.com>
> ---
>   arch/x86/entry/common.c        | 43 ++++++++++++++++++++++++++-
>   arch/x86/entry/entry_64.S      | 53 ++--------------------------------
>   arch/x86/include/asm/syscall.h |  2 +-
>   3 files changed, 45 insertions(+), 53 deletions(-)
> 
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 0551bcb197fb..207149a0a9b3 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -71,7 +71,8 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
>   	return false;
>   }
>   
> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> +/* Returns true to return using SYSRET, or false to use IRET */
> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
>   {
>   	add_random_kstack_offset();
>   	nr = syscall_enter_from_user_mode(regs, nr);
> @@ -85,6 +86,46 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>   
>   	instrumentation_end();
>   	syscall_exit_to_user_mode(regs);
> +
> +	/*
> +	 * Check that the register state is valid for using SYSRET to exit
> +	 * to userspace.  Otherwise use the slower but fully capable IRET
> +	 * exit path.
> +	 */
> +
> +	/* XEN PV guests always use IRET path */
> +	if (cpu_feature_enabled(X86_FEATURE_XENPV))
> +		return false;
> +
> +	/* SYSRET requires RCX == RIP and R11 == EFLAGS */
> +	if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> +		return false;


Under what conditions do we expect this to not be true since we've come 
via the syscall which adheres to this layout? IOW isn't this always 
going to be true and we can simply eliminate it?

> +
> +	/* CS and SS must match the values set in MSR_STAR */
> +	if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> +		return false;
> +
> +	/*
> +	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> +	 * in kernel space.  This essentially lets the user take over
> +	 * the kernel, since userspace controls RSP.
> +	 *
> +	 * Change top bits to match most significant bit (47th or 56th bit
> +	 * depending on paging mode) in the address.
> +	 */
> +	if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT + 1)))
> +		return false;
> +
> +	/*
> +	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
> +	 * restoring TF results in a trap from userspace immediately after
> +	 * SYSRET.
> +	 */
> +	if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> +		return false;
> +
> +	/* Use SYSRET to exit to userspace */
> +	return true;
>   }
>   #endif
>   
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index 3bdc22d7e78f..de6469dffe3a 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -126,57 +126,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
>   	 * In the Xen PV case we must use iret anyway.
>   	 */
>   
> -	ALTERNATIVE "", "jmp	swapgs_restore_regs_and_return_to_usermode", \
> -		X86_FEATURE_XENPV
> -
> -	movq	RCX(%rsp), %rcx
> -	movq	RIP(%rsp), %r11
> -
> -	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	/*
> -	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> -	 * in kernel space.  This essentially lets the user take over
> -	 * the kernel, since userspace controls RSP.
> -	 *
> -	 * If width of "canonical tail" ever becomes variable, this will need
> -	 * to be updated to remain correct on both old and new CPUs.
> -	 *
> -	 * Change top bits to match most significant bit (47th or 56th bit
> -	 * depending on paging mode) in the address.
> -	 */
> -#ifdef CONFIG_X86_5LEVEL
> -	ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
> -		"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
> -#else
> -	shl	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> -	sar	$(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> -#endif
> -
> -	/* If this changed %rcx, it was not canonical */
> -	cmpq	%rcx, %r11
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	movq	R11(%rsp), %r11
> -	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> -
> -	/*
> -	 * SYSRET cannot restore RF.  It can restore TF, but unlike IRET,
> -	 * restoring TF results in a trap from userspace immediately after
> -	 * SYSRET.
> -	 */
> -	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
> -	jnz	swapgs_restore_regs_and_return_to_usermode
> -
> -	/* nothing to check for RSP */
> -
> -	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
> -	jne	swapgs_restore_regs_and_return_to_usermode
> +	ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
> +		"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
>   
>   	/*
>   	 * We win! This label is here just for ease of understanding
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index c7e25c940f1a..f44e2f9ab65d 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
>   		? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
>   }
>   
> -void do_syscall_64(struct pt_regs *regs, int nr);
> +bool do_syscall_64(struct pt_regs *regs, int nr);
>   
>   #endif	/* CONFIG_X86_32 */
>   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ