lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5bd9a3728298b32c067be9048b328f6768f85ab2.camel@oracle.com>
Date:   Thu, 04 Oct 2018 12:28:54 +0300
From:   Nikita Leshenko <nikita.leshchenko@...cle.com>
To:     Paolo Bonzini <pbonzini@...hat.com>, linux-kernel@...r.kernel.org,
        kvm@...r.kernel.org
Cc:     Sean Christopherson <sean.j.christopherson@...el.com>,
        Liran Alon <liran.alon@...cle.com>,
        Radim Krčmář <rkrcmar@...hat.com>
Subject: Re: [PATCH v2] kvm: nVMX: fix entry with pending interrupt if APICv
 is enabled

On Wed, 2018-10-03 at 17:26 +0200, Paolo Bonzini wrote:
> Commit b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 introduced a check on
> the interrupt-window and NMI-window CPU execution controls in order to
> inject an external interrupt vmexit before the first guest instruction
> executes.  However, when APIC virtualization is enabled the host does not
> need a vmexit in order to inject an interrupt at the next interrupt window;
> instead, it just places the interrupt vector in RVI and the processor will
> inject it as soon as possible.  Therefore, on machines with APICv it is
> not enough to check the CPU execution controls: the same scenario can also
> happen if RVI>0.
> 
> Fixes: b5861e5cf2fcf83031ea3e26b0a69d887adf7d21
> Cc: Nikita Leshchenko <nikita.leshchenko@...cle.com>
> Cc: Sean Christopherson <sean.j.christopherson@...el.com>
> Cc: Liran Alon <liran.alon@...cle.com>
> Cc: Radim Krčmář <rkrcmar@...hat.com>
> Signed-off-by: Paolo Bonzini <pbonzini@...hat.com>

Reviewed-by: Nikita Leshenko <nikita.leshchenko@...cle.com>

> ---
>  arch/x86/kvm/vmx.c | 38 ++++++++++++++++++++++++++------------
>  1 file changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 6ef2d5b139b9..c7ae8ea87bc4 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6162,6 +6162,11 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
>  	nested_mark_vmcs12_pages_dirty(vcpu);
>  }
>  
> +static u8 vmx_get_rvi(void)
> +{
> +	return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
> +}
> +
>  static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  {
>  	struct vcpu_vmx *vmx = to_vmx(vcpu);
> @@ -6174,7 +6179,7 @@ static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
>  		WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
>  		return false;
>  
> -	rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
> +	rvi = vmx_get_rvi();
>  
>  	vapic_page = kmap(vmx->nested.virtual_apic_page);
>  	vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
> @@ -10349,6 +10354,14 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
>  	return max_irr;
>  }
>  
> +static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
> +{
> +	u8 rvi = vmx_get_rvi();
> +	u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
> +
> +	return ((rvi & 0xf0) > (vppr & 0xf0));
> +}
> +
>  static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
>  {
>  	if (!kvm_vcpu_apicv_active(vcpu))
> @@ -12593,10 +12606,13 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
>  	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>  	bool from_vmentry = !!exit_qual;
>  	u32 dummy_exit_qual;
> -	u32 vmcs01_cpu_exec_ctrl;
> +	bool evaluate_pending_interrupts;
>  	int r = 0;
>  
> -	vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
> +	evaluate_pending_interrupts = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
> +		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING);
> +	if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
> +		evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
>  
>  	enter_guest_mode(vcpu);
>  
> @@ -12644,16 +12660,14 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
>  	 * to L1 or delivered directly to L2 (e.g. In case L1 don't
>  	 * intercept EXTERNAL_INTERRUPT).
>  	 *
> -	 * Usually this would be handled by L0 requesting a
> -	 * IRQ/NMI window by setting VMCS accordingly. However,
> -	 * this setting was done on VMCS01 and now VMCS02 is active
> -	 * instead. Thus, we force L0 to perform pending event
> -	 * evaluation by requesting a KVM_REQ_EVENT.
> -	 */
> -	if (vmcs01_cpu_exec_ctrl &
> -		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
> +	 * Usually this would be handled by the processor noticing an
> +	 * IRQ/NMI window request, or checking RVI during evaluation of
> +	 * pending virtual interrupts.  However, this setting was done
> +	 * on VMCS01 and now VMCS02 is active instead. Thus, we force L0
> +	 * to perform pending event evaluation by requesting a KVM_REQ_EVENT.
> +	 */
> +	if (unlikely(evaluate_pending_interrupts))
>  		kvm_make_request(KVM_REQ_EVENT, vcpu);
> -	}
>  
>  	/*
>  	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ