linux-kernel - Re: [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <85f58713-67f6-fb06-9426-8d03809cea07@linux.intel.com>
Date:   Wed, 10 May 2017 22:48:44 +1200
From:   "Huang, Kai" <kai.huang@...ux.intel.com>
To:     Bandan Das <bsd@...hat.com>, kvm@...r.kernel.org
Cc:     pbonzini@...hat.com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging



On 5/6/2017 7:25 AM, Bandan Das wrote:
> With EPT A/D enabled, processor access to L2 guest
> paging structures will result in a write violation.
> When this happens, write the GUEST_PHYSICAL_ADDRESS
> to the pml buffer provided by L1 if the access is
> write and the dirty bit is being set.
>
> This patch also adds necessary checks during VMEntry if L1
> has enabled PML. If the PML index overflows, we change the
> exit reason and run L1 to simulate a PML full event.
>
> Signed-off-by: Bandan Das <bsd@...hat.com>
> ---
>  arch/x86/kvm/vmx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2211697..8b9e942 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -248,6 +248,7 @@ struct __packed vmcs12 {
>  	u64 xss_exit_bitmap;
>  	u64 guest_physical_address;
>  	u64 vmcs_link_pointer;
> +	u64 pml_address;
>  	u64 guest_ia32_debugctl;
>  	u64 guest_ia32_pat;
>  	u64 guest_ia32_efer;
> @@ -369,6 +370,7 @@ struct __packed vmcs12 {
>  	u16 guest_ldtr_selector;
>  	u16 guest_tr_selector;
>  	u16 guest_intr_status;
> +	u16 guest_pml_index;
>  	u16 host_es_selector;
>  	u16 host_cs_selector;
>  	u16 host_ss_selector;
> @@ -407,6 +409,7 @@ struct nested_vmx {
>  	/* Has the level1 guest done vmxon? */
>  	bool vmxon;
>  	gpa_t vmxon_ptr;
> +	bool pml_full;
>
>  	/* The guest-physical address of the current VMCS L1 keeps for L2 */
>  	gpa_t current_vmptr;
> @@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>  	FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
>  	FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
>  	FIELD(GUEST_INTR_STATUS, guest_intr_status),
> +	FIELD(GUEST_PML_INDEX, guest_pml_index),
>  	FIELD(HOST_ES_SELECTOR, host_es_selector),
>  	FIELD(HOST_CS_SELECTOR, host_cs_selector),
>  	FIELD(HOST_SS_SELECTOR, host_ss_selector),
> @@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>  	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
>  	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
>  	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> +	FIELD64(PML_ADDRESS, pml_address),
>  	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
>  	FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
>  	FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
> @@ -1349,6 +1354,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
>  		vmx_xsaves_supported();
>  }
>
> +static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
> +{
> +	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
> +}
> +
>  static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
>  {
>  	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
> @@ -9368,13 +9378,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
>  		struct x86_exception *fault)
>  {
>  	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>  	u32 exit_reason;
> +	unsigned long exit_qualification = vcpu->arch.exit_qualification;
>
> -	if (fault->error_code & PFERR_RSVD_MASK)
> +	if (vmx->nested.pml_full) {
> +		exit_reason = EXIT_REASON_PML_FULL;
> +		vmx->nested.pml_full = false;
> +		exit_qualification &= INTR_INFO_UNBLOCK_NMI;

Sorry I cannot recall the details. probably better to add a comment to 
indicate why mask out INTR_INFO_UNBLOCK_NMI?


> +	} else if (fault->error_code & PFERR_RSVD_MASK)
>  		exit_reason = EXIT_REASON_EPT_MISCONFIG;
>  	else
>  		exit_reason = EXIT_REASON_EPT_VIOLATION;
> -	nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
> +
> +	nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
>  	vmcs12->guest_physical_address = fault->address;
>  }
>
> @@ -9717,6 +9734,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
>  	return 0;
>  }
>
> +static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
> +					 struct vmcs12 *vmcs12)
> +{
> +	u64 address = vmcs12->pml_address;
> +	int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> +	if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
> +		if (!nested_cpu_has_ept(vmcs12) ||
> +		    !IS_ALIGNED(address, 4096)  ||
> +		    address >> maxphyaddr)
> +			return -EINVAL;
> +	}

Do we also need to check whether EPT A/D has been enabled for vmcs12 to 
make vmentry work? I cannot recall details but probably not necessary.

> +
> +	return 0;
> +}
> +
>  static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
>  				       struct vmx_msr_entry *e)
>  {
> @@ -10252,6 +10285,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  	if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
>  		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
> +	if (nested_vmx_check_pml_controls(vcpu, vmcs12))
> +		return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> +
>  	if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
>  				vmx->nested.nested_vmx_procbased_ctls_low,
>  				vmx->nested.nested_vmx_procbased_ctls_high) ||
> @@ -11146,6 +11182,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
>  	kvm_flush_pml_buffers(kvm);
>  }
>
> +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
> +{
> +	struct vmcs12 *vmcs12;
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +	gpa_t gpa;
> +	struct page *page = NULL;
> +	u64 *pml_address;
> +
> +	if (is_guest_mode(vcpu)) {
> +		WARN_ON_ONCE(vmx->nested.pml_full);
> +
> +		/*
> +		 * Check if PML is enabled for the nested guest.
> +		 * Whether eptp bit 6 is set is already checked
> +		 * as part of A/D emulation.
> +		 */
> +		vmcs12 = get_vmcs12(vcpu);
> +		if (!nested_cpu_has_pml(vmcs12))
> +			return 0;

Since above nested_vmx_check_pml_controls doesn't check EPT A/D bit in 
L1, seems we need to add this check here.

> +
> +		if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
> +			vmx->nested.pml_full = true;
> +			return 1;
> +		}

Is the purpose of returning 1 to make upper layer code to inject PML 
full VMEXIt to L1 in nested_ept_inject_page_fault?

> +
> +		gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
> +
> +		page = nested_get_page(vcpu, vmcs12->pml_address);
> +		if (!page)
> +			return 0;

If PML is enabled in L1, I think nested_get_page should never return a 
NULL PML page (unless L1 does something wrong)? Probably better to 
return 1 rather than 0, and handle error in nested_ept_inject_page_fault 
according to vmcs12->pml_address?

> +
> +		pml_address = kmap(page);
> +		pml_address[vmcs12->guest_pml_index--] = gpa;

This gpa is L2 guest's GPA. Do we also need to mark L1's GPA (which is 
related to L2 guest's GPA above) in to dirty-log? Or has this already 
been done?

Thanks,
-Kai

> +		kunmap(page);
> +		nested_release_page_clean(page);
> +	}
> +
> +	return 0;
> +}
> +
>  static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
>  					   struct kvm_memory_slot *memslot,
>  					   gfn_t offset, unsigned long mask)
> @@ -11505,6 +11581,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
>  	.slot_disable_log_dirty = vmx_slot_disable_log_dirty,
>  	.flush_log_dirty = vmx_flush_log_dirty,
>  	.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> +	.write_log_dirty = vmx_write_pml_buffer,
>
>  	.pre_block = vmx_pre_block,
>  	.post_block = vmx_post_block,
>