lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87976f9b-2c36-b4f7-5382-4eba569cc687@oracle.com>
Date:   Thu, 7 May 2020 15:39:20 +0200
From:   Alexandre Chartre <alexandre.chartre@...cle.com>
To:     Thomas Gleixner <tglx@...utronix.de>,
        LKML <linux-kernel@...r.kernel.org>
Cc:     x86@...nel.org, "Paul E. McKenney" <paulmck@...nel.org>,
        Andy Lutomirski <luto@...nel.org>,
        Frederic Weisbecker <frederic@...nel.org>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Petr Mladek <pmladek@...e.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Juergen Gross <jgross@...e.com>,
        Brian Gerst <brgerst@...il.com>,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Will Deacon <will@...nel.org>
Subject: Re: [patch V4 part 2 04/18] x86/entry/common: Protect against
 instrumentation


On 5/5/20 3:41 PM, Thomas Gleixner wrote:
> Mark the various syscall entries with noinstr to protect them against
> instrumentation and add the noinstr_begin()/end() annotations to mark the
> parts of the functions which are safe to call out into instrumentable code.
> 
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> ---
>   arch/x86/entry/common.c |  135 ++++++++++++++++++++++++++++++++----------------
>   1 file changed, 90 insertions(+), 45 deletions(-)
> 
> --- a/arch/x86/entry/common.c
> +++ b/arch/x86/entry/common.c
> @@ -41,15 +41,26 @@
>   
>   #ifdef CONFIG_CONTEXT_TRACKING
>   /* Called on entry from user mode with IRQs off. */
> -__visible inline noinstr void enter_from_user_mode(void)
> +__visible noinstr void enter_from_user_mode(void)
>   {
> -	CT_WARN_ON(ct_state() != CONTEXT_USER);
> +	enum ctx_state state = ct_state();
> +
>   	user_exit_irqoff();
> +
> +	instr_begin();
> +	CT_WARN_ON(state != CONTEXT_USER);
> +	instr_end();
>   }
>   #else
>   static inline void enter_from_user_mode(void) {}
>   #endif
>   
> +static noinstr void exit_to_user_mode(void)
> +{
> +	user_enter_irqoff();
> +	mds_user_clear_cpu_buffers();
> +}
> +
>   static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
>   {
>   #ifdef CONFIG_X86_64
> @@ -179,8 +190,7 @@ static void exit_to_usermode_loop(struct
>   	}
>   }
>   
> -/* Called with IRQs disabled. */
> -__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
> +static void __prepare_exit_to_usermode(struct pt_regs *regs)
>   {
>   	struct thread_info *ti = current_thread_info();
>   	u32 cached_flags;
> @@ -219,10 +229,14 @@ static void exit_to_usermode_loop(struct
>   	 */
>   	ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
>   #endif
> +}
>   
> -	user_enter_irqoff();
> -
> -	mds_user_clear_cpu_buffers();
> +__visible noinstr void prepare_exit_to_usermode(struct pt_regs *regs)
> +{
> +	instr_begin();
> +	__prepare_exit_to_usermode(regs);
> +	instr_end();
> +	exit_to_user_mode();
>   }
>   
>   #define SYSCALL_EXIT_WORK_FLAGS				\
> @@ -251,11 +265,7 @@ static void syscall_slow_exit_work(struc
>   		tracehook_report_syscall_exit(regs, step);
>   }
>   
> -/*
> - * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
> - * state such that we can immediately switch to user mode.
> - */
> -__visible inline void syscall_return_slowpath(struct pt_regs *regs)
> +static void __syscall_return_slowpath(struct pt_regs *regs)
>   {
>   	struct thread_info *ti = current_thread_info();
>   	u32 cached_flags = READ_ONCE(ti->flags);
> @@ -276,15 +286,29 @@ static void syscall_slow_exit_work(struc
>   		syscall_slow_exit_work(regs, cached_flags);
>   
>   	local_irq_disable();
> -	prepare_exit_to_usermode(regs);
> +	__prepare_exit_to_usermode(regs);
> +}
> +
> +/*
> + * Called with IRQs on and fully valid regs.  Returns with IRQs off in a
> + * state such that we can immediately switch to user mode.
> + */
> +__visible noinstr void syscall_return_slowpath(struct pt_regs *regs)
> +{
> +	instr_begin();
> +	__syscall_return_slowpath(regs);
> +	instr_end();
> +	exit_to_user_mode();
>   }
>   
>   #ifdef CONFIG_X86_64
> -__visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
> +__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
>   {
>   	struct thread_info *ti;
>   
>   	enter_from_user_mode();
> +	instr_begin();
> +
>   	local_irq_enable();
>   	ti = current_thread_info();
>   	if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
> @@ -301,8 +325,10 @@ static void syscall_slow_exit_work(struc
>   		regs->ax = x32_sys_call_table[nr](regs);
>   #endif
>   	}
> +	__syscall_return_slowpath(regs);
>   
> -	syscall_return_slowpath(regs);
> +	instr_end();
> +	exit_to_user_mode();
>   }
>   #endif
>   
> @@ -310,10 +336,10 @@ static void syscall_slow_exit_work(struc
>   /*
>    * Does a 32-bit syscall.  Called with IRQs on in CONTEXT_KERNEL.  Does
>    * all entry and exit work and returns with IRQs off.  This function is
> - * extremely hot in workloads that use it, and it's usually called from
> + * ex2tremely hot in workloads that use it, and it's usually called from

typo: "ex2tremely"

alex.


>    * do_fast_syscall_32, so forcibly inline it to improve performance.
>    */
> -static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
> +static void do_syscall_32_irqs_on(struct pt_regs *regs)
>   {
>   	struct thread_info *ti = current_thread_info();
>   	unsigned int nr = (unsigned int)regs->orig_ax;
> @@ -337,27 +363,62 @@ static __always_inline void do_syscall_3
>   		regs->ax = ia32_sys_call_table[nr](regs);
>   	}
>   
> -	syscall_return_slowpath(regs);
> +	__syscall_return_slowpath(regs);
>   }
>   
>   /* Handles int $0x80 */
> -__visible void do_int80_syscall_32(struct pt_regs *regs)
> +__visible noinstr void do_int80_syscall_32(struct pt_regs *regs)
>   {
>   	enter_from_user_mode();
> +	instr_begin();
> +
>   	local_irq_enable();
>   	do_syscall_32_irqs_on(regs);
> +
> +	instr_end();
> +	exit_to_user_mode();
> +}
> +
> +static bool __do_fast_syscall_32(struct pt_regs *regs)
> +{
> +	int res;
> +
> +	/* Fetch EBP from where the vDSO stashed it. */
> +	if (IS_ENABLED(CONFIG_X86_64)) {
> +		/*
> +		 * Micro-optimization: the pointer we're following is
> +		 * explicitly 32 bits, so it can't be out of range.
> +		 */
> +		res = __get_user(*(u32 *)&regs->bp,
> +			 (u32 __user __force *)(unsigned long)(u32)regs->sp);
> +	} else {
> +		res = get_user(*(u32 *)&regs->bp,
> +		       (u32 __user __force *)(unsigned long)(u32)regs->sp);
> +	}
> +
> +	if (res) {
> +		/* User code screwed up. */
> +		regs->ax = -EFAULT;
> +		local_irq_disable();
> +		__prepare_exit_to_usermode(regs);
> +		return false;
> +	}
> +
> +	/* Now this is just like a normal syscall. */
> +	do_syscall_32_irqs_on(regs);
> +	return true;
>   }
>   
>   /* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */
> -__visible long do_fast_syscall_32(struct pt_regs *regs)
> +__visible noinstr long do_fast_syscall_32(struct pt_regs *regs)
>   {
>   	/*
>   	 * Called using the internal vDSO SYSENTER/SYSCALL32 calling
>   	 * convention.  Adjust regs so it looks like we entered using int80.
>   	 */
> -
>   	unsigned long landing_pad = (unsigned long)current->mm->context.vdso +
> -		vdso_image_32.sym_int80_landing_pad;
> +					vdso_image_32.sym_int80_landing_pad;
> +	bool success;
>   
>   	/*
>   	 * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward
> @@ -367,33 +428,17 @@ static __always_inline void do_syscall_3
>   	regs->ip = landing_pad;
>   
>   	enter_from_user_mode();
> +	instr_begin();
>   
>   	local_irq_enable();
> +	success = __do_fast_syscall_32(regs);
>   
> -	/* Fetch EBP from where the vDSO stashed it. */
> -	if (
> -#ifdef CONFIG_X86_64
> -		/*
> -		 * Micro-optimization: the pointer we're following is explicitly
> -		 * 32 bits, so it can't be out of range.
> -		 */
> -		__get_user(*(u32 *)&regs->bp,
> -			    (u32 __user __force *)(unsigned long)(u32)regs->sp)
> -#else
> -		get_user(*(u32 *)&regs->bp,
> -			 (u32 __user __force *)(unsigned long)(u32)regs->sp)
> -#endif
> -		) {
> -
> -		/* User code screwed up. */
> -		local_irq_disable();
> -		regs->ax = -EFAULT;
> -		prepare_exit_to_usermode(regs);
> -		return 0;	/* Keep it simple: use IRET. */
> -	}
> +	instr_end();
> +	exit_to_user_mode();
>   
> -	/* Now this is just like a normal syscall. */
> -	do_syscall_32_irqs_on(regs);
> +	/* If it failed, keep it simple: use IRET. */
> +	if (!success)
> +		return 0;
>   
>   #ifdef CONFIG_X86_64
>   	/*
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ