lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 9 Aug 2016 17:32:30 +0800
From:	Yang Zhang <yang.zhang.wz@...il.com>
To:	Radim Krčmář <rkrcmar@...hat.com>,
	linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc:	Jim Mattson <jmattson@...gle.com>,
	Wincy Van <fanwenyi0529@...il.com>,
	Paolo Bonzini <pbonzini@...hat.com>,
	Bandan Das <bsd@...hat.com>
Subject: Re: [PATCH 1/2] KVM: nVMX: fix msr bitmaps to prevent L2 from
 accessing L0 x2APIC

On 2016/8/9 2:16, Radim Krčmář wrote:
> msr bitmap can be used to avoid a VM exit (interception) on guest MSR
> accesses.  In some configurations of VMX controls, the guest can even
> directly access host's x2APIC MSRs.  See SDM 29.5 VIRTUALIZING MSR-BASED
> APIC ACCESSES.
>
> L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI.
> To do so, L1 would first trick KVM to disable all possible interceptions
> by enabling APICv features and then would turn those features off;
> nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would
> not intercept previously enabled MSRs even though they were not safe
> with the new configuration.
>
> Correctly re-enabling interceptions is not enough as a second bug would
> still allow L1+L2 to access host's MSRs: msr bitmap was shared for all
> VMCSs, so L1 could trigger a race to get the desired combination of msr
> bitmap and VMX controls.
>
> This fix allocates a msr bitmap for every L1 VCPU, allows only safe
> x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would
> have to intercept everything anyway.
>
> Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap")
> Reported-by: Jim Mattson <jmattson@...gle.com>
> Suggested-by: Wincy Van <fanwenyi0529@...il.com>
> Signed-off-by: Radim Krčmář <rkrcmar@...hat.com>
> ---
>  arch/x86/kvm/vmx.c | 107 ++++++++++++++++++++++-------------------------------
>  1 file changed, 44 insertions(+), 63 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a45d8580f91e..c66ac2c70d22 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -435,6 +435,8 @@ struct nested_vmx {
>  	bool pi_pending;
>  	u16 posted_intr_nv;
>
> +	unsigned long *msr_bitmap;
> +
>  	struct hrtimer preemption_timer;
>  	bool preemption_timer_expired;
>
> @@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy;
>  static unsigned long *vmx_msr_bitmap_longmode;
>  static unsigned long *vmx_msr_bitmap_legacy_x2apic;
>  static unsigned long *vmx_msr_bitmap_longmode_x2apic;
> -static unsigned long *vmx_msr_bitmap_nested;
>  static unsigned long *vmx_vmread_bitmap;
>  static unsigned long *vmx_vmwrite_bitmap;
>
> @@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
>  	unsigned long *msr_bitmap;
>
>  	if (is_guest_mode(vcpu))
> -		msr_bitmap = vmx_msr_bitmap_nested;
> +		msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap;
>  	else if (cpu_has_secondary_exec_ctrls() &&
>  		 (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
>  		  SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
> @@ -6363,13 +6364,6 @@ static __init int hardware_setup(void)
>  	if (!vmx_msr_bitmap_longmode_x2apic)
>  		goto out4;
>
> -	if (nested) {
> -		vmx_msr_bitmap_nested =
> -			(unsigned long *)__get_free_page(GFP_KERNEL);
> -		if (!vmx_msr_bitmap_nested)
> -			goto out5;
> -	}
> -
>  	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
>  	if (!vmx_vmread_bitmap)
>  		goto out6;
> @@ -6392,8 +6386,6 @@ static __init int hardware_setup(void)
>
>  	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
>  	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
> -	if (nested)
> -		memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE);
>
>  	if (setup_vmcs_config(&vmcs_config) < 0) {
>  		r = -EIO;
> @@ -6529,9 +6521,6 @@ out8:
>  out7:
>  	free_page((unsigned long)vmx_vmread_bitmap);
>  out6:
> -	if (nested)
> -		free_page((unsigned long)vmx_msr_bitmap_nested);
> -out5:
>  	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
>  out4:
>  	free_page((unsigned long)vmx_msr_bitmap_longmode);
> @@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void)
>  	free_page((unsigned long)vmx_io_bitmap_a);
>  	free_page((unsigned long)vmx_vmwrite_bitmap);
>  	free_page((unsigned long)vmx_vmread_bitmap);
> -	if (nested)
> -		free_page((unsigned long)vmx_msr_bitmap_nested);
>
>  	free_kvm_area();
>  }
> @@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>  		return 1;
>  	}
>
> +	if (cpu_has_vmx_msr_bitmap()) {
> +		vmx->nested.msr_bitmap =
> +				(unsigned long *)__get_free_page(GFP_KERNEL);
> +		if (!vmx->nested.msr_bitmap)
> +			goto out_msr_bitmap;
> +	}
> +

We export msr_bitmap to guest even it is not supported by hardware. So 
we need to allocate msr_bitmap for L1 unconditionally.

>  	vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL);
>  	if (!vmx->nested.cached_vmcs12)
> -		return -ENOMEM;
> +		goto out_cached_vmcs12;
>
>  	if (enable_shadow_vmcs) {
>  		shadow_vmcs = alloc_vmcs();
> -		if (!shadow_vmcs) {
> -			kfree(vmx->nested.cached_vmcs12);
> -			return -ENOMEM;
> -		}
> +		if (!shadow_vmcs)
> +			goto out_shadow_vmcs;
>  		/* mark vmcs as shadow */
>  		shadow_vmcs->revision_id |= (1u << 31);
>  		/* init shadow vmcs */
> @@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
>  	skip_emulated_instruction(vcpu);
>  	nested_vmx_succeed(vcpu);
>  	return 1;
> +
> +out_shadow_vmcs:
> +	kfree(vmx->nested.cached_vmcs12);
> +
> +out_cached_vmcs12:
> +	free_page((unsigned long)vmx->nested.msr_bitmap);
> +
> +out_msr_bitmap:
> +	return -ENOMEM;
>  }
>
>  /*
> @@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx)
>  	vmx->nested.vmxon = false;
>  	free_vpid(vmx->nested.vpid02);
>  	nested_release_vmcs12(vmx);
> +	if (vmx->nested.msr_bitmap) {
> +		free_page((unsigned long)vmx->nested.msr_bitmap);
> +		vmx->nested.msr_bitmap = NULL;
> +	}
>  	if (enable_shadow_vmcs)
>  		free_vmcs(vmx->nested.current_shadow_vmcs);
>  	kfree(vmx->nested.cached_vmcs12);
> @@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>  {
>  	int msr;
>  	struct page *page;
> -	unsigned long *msr_bitmap;
> +	unsigned long *msr_bitmap_l1;
> +	unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap;
>
> +	/* This shortcut is ok because we support only x2APIC MSRs so far. */
>  	if (!nested_cpu_has_virt_x2apic_mode(vmcs12))
>  		return false;
>
> @@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu,
>  		WARN_ON(1);
>  		return false;
>  	}
> -	msr_bitmap = (unsigned long *)kmap(page);
> -	if (!msr_bitmap) {
> +	msr_bitmap_l1 = (unsigned long *)kmap(page);
> +	if (!msr_bitmap_l1) {
>  		nested_release_page_clean(page);
>  		WARN_ON(1);
>  		return false;
>  	}
>
> +	memset(msr_bitmap_l0, 0xff, PAGE_SIZE);
> +
>  	if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
>  		if (nested_cpu_has_apic_reg_virt(vmcs12))
>  			for (msr = 0x800; msr <= 0x8ff; msr++)
>  				nested_vmx_disable_intercept_for_msr(
> -					msr_bitmap,
> -					vmx_msr_bitmap_nested,
> +					msr_bitmap_l1, msr_bitmap_l0,
>  					msr, MSR_TYPE_R);
> -		/* TPR is allowed */
> -		nested_vmx_disable_intercept_for_msr(msr_bitmap,
> -				vmx_msr_bitmap_nested,
> +
> +		nested_vmx_disable_intercept_for_msr(
> +				msr_bitmap_l1, msr_bitmap_l0,
>  				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
>  				MSR_TYPE_R | MSR_TYPE_W);
> +
>  		if (nested_cpu_has_vid(vmcs12)) {
> -			/* EOI and self-IPI are allowed */
>  			nested_vmx_disable_intercept_for_msr(
> -				msr_bitmap,
> -				vmx_msr_bitmap_nested,
> +				msr_bitmap_l1, msr_bitmap_l0,
>  				APIC_BASE_MSR + (APIC_EOI >> 4),
>  				MSR_TYPE_W);
>  			nested_vmx_disable_intercept_for_msr(
> -				msr_bitmap,
> -				vmx_msr_bitmap_nested,
> +				msr_bitmap_l1, msr_bitmap_l0,
>  				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
>  				MSR_TYPE_W);
>  		}
> -	} else {
> -		/*
> -		 * Enable reading intercept of all the x2apic
> -		 * MSRs. We should not rely on vmcs12 to do any
> -		 * optimizations here, it may have been modified
> -		 * by L1.
> -		 */
> -		for (msr = 0x800; msr <= 0x8ff; msr++)
> -			__vmx_enable_intercept_for_msr(
> -				vmx_msr_bitmap_nested,
> -				msr,
> -				MSR_TYPE_R);
> -
> -		__vmx_enable_intercept_for_msr(
> -				vmx_msr_bitmap_nested,
> -				APIC_BASE_MSR + (APIC_TASKPRI >> 4),
> -				MSR_TYPE_W);
> -		__vmx_enable_intercept_for_msr(
> -				vmx_msr_bitmap_nested,
> -				APIC_BASE_MSR + (APIC_EOI >> 4),
> -				MSR_TYPE_W);
> -		__vmx_enable_intercept_for_msr(
> -				vmx_msr_bitmap_nested,
> -				APIC_BASE_MSR + (APIC_SELF_IPI >> 4),
> -				MSR_TYPE_W);
>  	}
>  	kunmap(page);
>  	nested_release_page_clean(page);
> @@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	}
>
>  	if (cpu_has_vmx_msr_bitmap() &&
> -	    exec_control & CPU_BASED_USE_MSR_BITMAPS) {
> -		nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
> -		/* MSR_BITMAP will be set by following vmx_set_efer. */
> -	} else
> +	    exec_control & CPU_BASED_USE_MSR_BITMAPS &&
> +	    nested_vmx_merge_msr_bitmap(vcpu, vmcs12))
> +		; /* MSR_BITMAP will be set by following vmx_set_efer. */
> +	else
>  		exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
>
>  	/*
>


-- 
Yang
Alibaba Cloud Computing

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ