[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a7b18c6b-1b27-b9fa-8936-2c0b7f5e5151@redhat.com>
Date: Tue, 18 Jul 2023 17:16:41 +0300
From: Mika Penttilä <mpenttil@...hat.com>
To: Brian Gerst <brgerst@...il.com>, linux-kernel@...r.kernel.org,
x86@...nel.org
Cc: Thomas Gleixner <tglx@...utronix.de>,
Borislav Petkov <bp@...en8.de>,
"H . Peter Anvin" <hpa@...or.com>,
Andy Lutomirski <luto@...nel.org>
Subject: Re: [PATCH 2/6] x86/entry/64: Convert SYSRET validation tests to C
Hi,
On 18.7.2023 16.44, Brian Gerst wrote:
> Signed-off-by: Brian Gerst <brgerst@...il.com>
> ---
> arch/x86/entry/common.c | 50 ++++++++++++++++++++++++++++++-
> arch/x86/entry/entry_64.S | 55 ++--------------------------------
> arch/x86/include/asm/syscall.h | 2 +-
> 3 files changed, 52 insertions(+), 55 deletions(-)
>
> diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
> index 6c2826417b33..afe79c3f1c5b 100644
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -70,8 +70,12 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
> return false;
> }
>
> -__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
> +/* Returns true to return using SYSRET, or false to use IRET */
> +__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
> {
> + long rip;
> + unsigned int shift_rip;
> +
> add_random_kstack_offset();
> nr = syscall_enter_from_user_mode(regs, nr);
>
> @@ -84,6 +88,50 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
>
> instrumentation_end();
> syscall_exit_to_user_mode(regs);
> +
> + /*
> + * Check that the register state is valid for using SYSRET to exit
> + * to userspace. Otherwise use the slower but fully capable IRET
> + * exit path.
> + */
> +
> + /* XEN PV guests always use IRET path */
> + if (cpu_feature_enabled(X86_FEATURE_XENPV))
> + return false;
> +
> + /* SYSRET requires RCX == RIP and R11 == EFLAGS */
> + if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
> + return false;
> +
> + /* CS and SS must match the values set in MSR_STAR */
> + if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
> + return false;
> +
> + /*
> + * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> + * in kernel space. This essentially lets the user take over
> + * the kernel, since userspace controls RSP.
> + *
> + * Change top bits to match most significant bit (47th or 56th bit
> + * depending on paging mode) in the address.
> + */
> + shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
Should this be:
shift_rip = (64 - __VIRTUAL_MASK_SHIFT - 1);
?
> + rip = (long) regs->ip;
> + rip <<= shift_rip;
> + rip >>= shift_rip;
> + if (unlikely((unsigned long) rip != regs->ip))
> + return false;
> +
> + /*
> + * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
> + * restoring TF results in a trap from userspace immediately after
> + * SYSRET.
> + */
> + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
> + return false;
> +
> + /* Use SYSRET to exit to userspace */
> + return true;
> }
> #endif
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index c01776a51545..b1288e22cae8 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -123,60 +123,9 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
> * Try to use SYSRET instead of IRET if we're returning to
> * a completely clean 64-bit userspace context. If we're not,
> * go to the slow exit path.
> - * In the Xen PV case we must use iret anyway.
> */
> -
> - ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
> - X86_FEATURE_XENPV
> -
> - movq RCX(%rsp), %rcx
> - movq RIP(%rsp), %r11
> -
> - cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - /*
> - * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
> - * in kernel space. This essentially lets the user take over
> - * the kernel, since userspace controls RSP.
> - *
> - * If width of "canonical tail" ever becomes variable, this will need
> - * to be updated to remain correct on both old and new CPUs.
> - *
> - * Change top bits to match most significant bit (47th or 56th bit
> - * depending on paging mode) in the address.
> - */
> -#ifdef CONFIG_X86_5LEVEL
> - ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
> - "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
> -#else
> - shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> - sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
> -#endif
> -
> - /* If this changed %rcx, it was not canonical */
> - cmpq %rcx, %r11
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - movq R11(%rsp), %r11
> - cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
> - jne swapgs_restore_regs_and_return_to_usermode
> -
> - /*
> - * SYSRET cannot restore RF. It can restore TF, but unlike IRET,
> - * restoring TF results in a trap from userspace immediately after
> - * SYSRET.
> - */
> - testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
> - jnz swapgs_restore_regs_and_return_to_usermode
> -
> - /* nothing to check for RSP */
> -
> - cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
> - jne swapgs_restore_regs_and_return_to_usermode
> + testb %al, %al
> + jz swapgs_restore_regs_and_return_to_usermode
>
> /*
> * We win! This label is here just for ease of understanding
> diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
> index 4fb36fba4b5a..be6c5515e0b9 100644
> --- a/arch/x86/include/asm/syscall.h
> +++ b/arch/x86/include/asm/syscall.h
> @@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
> ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
> }
>
> -void do_syscall_64(struct pt_regs *regs, int nr);
> +bool do_syscall_64(struct pt_regs *regs, int nr);
>
> #endif /* CONFIG_X86_32 */
>
--Mika
Powered by blists - more mailing lists