lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 6 May 2020 10:17:06 +0200
From:   Paolo Bonzini <pbonzini@...hat.com>
To:     Thomas Gleixner <tglx@...utronix.de>,
        LKML <linux-kernel@...r.kernel.org>
Cc:     x86@...nel.org, "Paul E. McKenney" <paulmck@...nel.org>,
        Andy Lutomirski <luto@...nel.org>,
        Alexandre Chartre <alexandre.chartre@...cle.com>,
        Frederic Weisbecker <frederic@...nel.org>,
        Sean Christopherson <sean.j.christopherson@...el.com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Petr Mladek <pmladek@...e.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Joel Fernandes <joel@...lfernandes.org>,
        Boris Ostrovsky <boris.ostrovsky@...cle.com>,
        Juergen Gross <jgross@...e.com>,
        Brian Gerst <brgerst@...il.com>,
        Mathieu Desnoyers <mathieu.desnoyers@...icios.com>,
        Josh Poimboeuf <jpoimboe@...hat.com>,
        Will Deacon <will@...nel.org>
Subject: Re: [patch V4 part 2 17/18] x86/kvm/vmx: Move guest enter/exit into
 .noinstr.text

On 05/05/20 15:41, Thomas Gleixner wrote:
> Move the functions which are inside the RCU off region into the
> non-instrumentable text section.
> 
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
> Cc: Paolo Bonzini <pbonzini@...hat.com>
> Cc: Sean Christopherson <sean.j.christopherson@...el.com>
> ---
>  arch/x86/include/asm/hardirq.h  |    4 -
>  arch/x86/include/asm/kvm_host.h |    8 +++
>  arch/x86/kvm/vmx/ops.h          |    4 +
>  arch/x86/kvm/vmx/vmenter.S      |    5 +
>  arch/x86/kvm/vmx/vmx.c          |  105 ++++++++++++++++++++++------------------
>  arch/x86/kvm/x86.c              |    2 
>  6 files changed, 79 insertions(+), 49 deletions(-)
> 
> --- a/arch/x86/include/asm/hardirq.h
> +++ b/arch/x86/include/asm/hardirq.h
> @@ -67,12 +67,12 @@ static inline void kvm_set_cpu_l1tf_flus
>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
>  }
>  
> -static inline void kvm_clear_cpu_l1tf_flush_l1d(void)
> +static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void)
>  {
>  	__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0);
>  }
>  
> -static inline bool kvm_get_cpu_l1tf_flush_l1d(void)
> +static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
>  {
>  	return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
>  }
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1601,7 +1601,15 @@ asmlinkage void kvm_spurious_fault(void)
>  	insn "\n\t"							\
>  	"jmp	668f \n\t"						\
>  	"667: \n\t"							\
> +	"1: \n\t"							\
> +	".pushsection .discard.instr_begin \n\t"			\
> +	".long 1b - . \n\t"						\
> +	".popsection \n\t"						\
>  	"call	kvm_spurious_fault \n\t"				\
> +	"1: \n\t"							\
> +	".pushsection .discard.instr_end \n\t"				\
> +	".long 1b - . \n\t"						\
> +	".popsection \n\t"						\
>  	"668: \n\t"							\
>  	_ASM_EXTABLE(666b, 667b)
>  
> --- a/arch/x86/kvm/vmx/ops.h
> +++ b/arch/x86/kvm/vmx/ops.h
> @@ -146,7 +146,9 @@ do {									\
>  			  : : op1 : "cc" : error, fault);		\
>  	return;								\
>  error:									\
> +	instr_begin();							\
>  	insn##_error(error_args);					\
> +	instr_end();							\
>  	return;								\
>  fault:									\
>  	kvm_spurious_fault();						\
> @@ -161,7 +163,9 @@ do {									\
>  			  : : op1, op2 : "cc" : error, fault);		\
>  	return;								\
>  error:									\
> +	instr_begin();							\
>  	insn##_error(error_args);					\
> +	instr_end();							\
>  	return;								\
>  fault:									\
>  	kvm_spurious_fault();						\
> --- a/arch/x86/kvm/vmx/vmenter.S
> +++ b/arch/x86/kvm/vmx/vmenter.S
> @@ -27,7 +27,7 @@
>  #define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
>  #endif
>  
> -	.text
> +.section .noinstr.text, "ax"
>  
>  /**
>   * vmx_vmenter - VM-Enter the current loaded VMCS
> @@ -231,6 +231,9 @@ SYM_FUNC_START(__vmx_vcpu_run)
>  	jmp 1b
>  SYM_FUNC_END(__vmx_vcpu_run)
>  
> +
> +.section .text, "ax"
> +
>  /**
>   * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
>   * @field:	VMCS field encoding that failed
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6000,7 +6000,7 @@ static int vmx_handle_exit(struct kvm_vc
>   * information but as all relevant affected CPUs have 32KiB L1D cache size
>   * there is no point in doing so.
>   */
> -static void vmx_l1d_flush(struct kvm_vcpu *vcpu)
> +static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu)
>  {
>  	int size = PAGE_SIZE << L1D_CACHE_ORDER;
>  
> @@ -6033,7 +6033,7 @@ static void vmx_l1d_flush(struct kvm_vcp
>  	vcpu->stat.l1d_flush++;
>  
>  	if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) {
> -		wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
> +		native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH);
>  		return;
>  	}
>  
> @@ -6514,7 +6514,7 @@ static void vmx_update_hv_timer(struct k
>  	}
>  }
>  
> -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
> +void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
>  {
>  	if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) {
>  		vmx->loaded_vmcs->host_state.rsp = host_rsp;
> @@ -6524,6 +6524,61 @@ void vmx_update_host_rsp(struct vcpu_vmx
>  
>  bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
>  
> +static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
> +					struct vcpu_vmx *vmx)
> +{
> +	instr_begin();
> +	/*
> +	 * VMENTER enables interrupts (host state), but the kernel state is
> +	 * interrupts disabled when this is invoked. Also tell RCU about
> +	 * it. This is the same logic as for exit_to_user_mode().
> +	 *
> +	 * 1) Trace interrupts on state
> +	 * 2) Prepare lockdep with RCU on
> +	 * 3) Invoke context tracking if enabled to adjust RCU state
> +	 * 4) Tell lockdep that interrupts are enabled
> +	 */
> +	trace_hardirqs_on_prepare();
> +	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
> +	instr_end();
> +
> +	guest_enter_irqoff();
> +	lockdep_hardirqs_on(CALLER_ADDR0);
> +
> +	/* L1D Flush includes CPU buffer clear to mitigate MDS */
> +	if (static_branch_unlikely(&vmx_l1d_should_flush))
> +		vmx_l1d_flush(vcpu);
> +	else if (static_branch_unlikely(&mds_user_clear))
> +		mds_clear_cpu_buffers();
> +
> +	if (vcpu->arch.cr2 != read_cr2())
> +		write_cr2(vcpu->arch.cr2);
> +
> +	vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
> +				   vmx->loaded_vmcs->launched);
> +
> +	vcpu->arch.cr2 = read_cr2();
> +
> +	/*
> +	 * VMEXIT disables interrupts (host state), but tracing and lockdep
> +	 * have them in state 'on'. Same as enter_from_user_mode().
> +	 *
> +	 * 1) Tell lockdep that interrupts are disabled
> +	 * 2) Invoke context tracking if enabled to reactivate RCU
> +	 * 3) Trace interrupts off state
> +	 *
> +	 * This needs to be done before the below as native_read_msr()
> +	 * contains a tracepoint and x86_spec_ctrl_restore_host() calls
> +	 * into world and some more.
> +	 */
> +	lockdep_hardirqs_off(CALLER_ADDR0);
> +	guest_exit_irqoff();
> +
> +	instr_begin();
> +	trace_hardirqs_off_prepare();
> +	instr_end();
> +}
> +
>  static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -6604,49 +6659,9 @@ static void vmx_vcpu_run(struct kvm_vcpu
>  	x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
>  
>  	/*
> -	 * VMENTER enables interrupts (host state), but the kernel state is
> -	 * interrupts disabled when this is invoked. Also tell RCU about
> -	 * it. This is the same logic as for exit_to_user_mode().
> -	 *
> -	 * 1) Trace interrupts on state
> -	 * 2) Prepare lockdep with RCU on
> -	 * 3) Invoke context tracking if enabled to adjust RCU state
> -	 * 4) Tell lockdep that interrupts are enabled
> +	 * The actual VMENTER/EXIT is in the .noinstr.text section.
>  	 */
> -	trace_hardirqs_on_prepare();
> -	lockdep_hardirqs_on_prepare(CALLER_ADDR0);
> -	guest_enter_irqoff();
> -	lockdep_hardirqs_on(CALLER_ADDR0);
> -
> -	/* L1D Flush includes CPU buffer clear to mitigate MDS */
> -	if (static_branch_unlikely(&vmx_l1d_should_flush))
> -		vmx_l1d_flush(vcpu);
> -	else if (static_branch_unlikely(&mds_user_clear))
> -		mds_clear_cpu_buffers();
> -
> -	if (vcpu->arch.cr2 != read_cr2())
> -		write_cr2(vcpu->arch.cr2);
> -
> -	vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
> -				   vmx->loaded_vmcs->launched);
> -
> -	vcpu->arch.cr2 = read_cr2();
> -
> -	/*
> -	 * VMEXIT disables interrupts (host state), but tracing and lockdep
> -	 * have them in state 'on'. Same as enter_from_user_mode().
> -	 *
> -	 * 1) Tell lockdep that interrupts are disabled
> -	 * 2) Invoke context tracking if enabled to reactivate RCU
> -	 * 3) Trace interrupts off state
> -	 *
> -	 * This needs to be done before the below as native_read_msr()
> -	 * contains a tracepoint and x86_spec_ctrl_restore_host() calls
> -	 * into world and some more.
> -	 */
> -	lockdep_hardirqs_off(CALLER_ADDR0);
> -	guest_exit_irqoff();
> -	trace_hardirqs_off_prepare();
> +	vmx_vcpu_enter_exit(vcpu, vmx);
>  
>  	/*
>  	 * We do not use IBRS in the kernel. If this vCPU has used the
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -381,7 +381,7 @@ int kvm_set_apic_base(struct kvm_vcpu *v
>  }
>  EXPORT_SYMBOL_GPL(kvm_set_apic_base);
>  
> -asmlinkage __visible void kvm_spurious_fault(void)
> +asmlinkage __visible noinstr void kvm_spurious_fault(void)
>  {
>  	/* Fault while not rebooting.  We want the trace. */
>  	BUG_ON(!kvm_rebooting);
> 

Acked-by: Paolo Bonzini <pbonzini@...hat.com>

Powered by blists - more mailing lists