[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <85f58713-67f6-fb06-9426-8d03809cea07@linux.intel.com>
Date: Wed, 10 May 2017 22:48:44 +1200
From: "Huang, Kai" <kai.huang@...ux.intel.com>
To: Bandan Das <bsd@...hat.com>, kvm@...r.kernel.org
Cc: pbonzini@...hat.com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/3] nVMX: Implement emulated Page Modification Logging
On 5/6/2017 7:25 AM, Bandan Das wrote:
> With EPT A/D enabled, processor access to L2 guest
> paging structures will result in a write violation.
> When this happens, write the GUEST_PHYSICAL_ADDRESS
> to the pml buffer provided by L1 if the access is
> write and the dirty bit is being set.
>
> This patch also adds necessary checks during VMEntry if L1
> has enabled PML. If the PML index overflows, we change the
> exit reason and run L1 to simulate a PML full event.
>
> Signed-off-by: Bandan Das <bsd@...hat.com>
> ---
> arch/x86/kvm/vmx.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> 1 file changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 2211697..8b9e942 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -248,6 +248,7 @@ struct __packed vmcs12 {
> u64 xss_exit_bitmap;
> u64 guest_physical_address;
> u64 vmcs_link_pointer;
> + u64 pml_address;
> u64 guest_ia32_debugctl;
> u64 guest_ia32_pat;
> u64 guest_ia32_efer;
> @@ -369,6 +370,7 @@ struct __packed vmcs12 {
> u16 guest_ldtr_selector;
> u16 guest_tr_selector;
> u16 guest_intr_status;
> + u16 guest_pml_index;
> u16 host_es_selector;
> u16 host_cs_selector;
> u16 host_ss_selector;
> @@ -407,6 +409,7 @@ struct nested_vmx {
> /* Has the level1 guest done vmxon? */
> bool vmxon;
> gpa_t vmxon_ptr;
> + bool pml_full;
>
> /* The guest-physical address of the current VMCS L1 keeps for L2 */
> gpa_t current_vmptr;
> @@ -742,6 +745,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
> FIELD(GUEST_LDTR_SELECTOR, guest_ldtr_selector),
> FIELD(GUEST_TR_SELECTOR, guest_tr_selector),
> FIELD(GUEST_INTR_STATUS, guest_intr_status),
> + FIELD(GUEST_PML_INDEX, guest_pml_index),
> FIELD(HOST_ES_SELECTOR, host_es_selector),
> FIELD(HOST_CS_SELECTOR, host_cs_selector),
> FIELD(HOST_SS_SELECTOR, host_ss_selector),
> @@ -767,6 +771,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
> FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
> FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
> FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
> + FIELD64(PML_ADDRESS, pml_address),
> FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
> FIELD64(GUEST_IA32_PAT, guest_ia32_pat),
> FIELD64(GUEST_IA32_EFER, guest_ia32_efer),
> @@ -1349,6 +1354,11 @@ static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
> vmx_xsaves_supported();
> }
>
> +static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
> +{
> + return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML);
> +}
> +
> static inline bool nested_cpu_has_virt_x2apic_mode(struct vmcs12 *vmcs12)
> {
> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
> @@ -9368,13 +9378,20 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
> struct x86_exception *fault)
> {
> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> u32 exit_reason;
> + unsigned long exit_qualification = vcpu->arch.exit_qualification;
>
> - if (fault->error_code & PFERR_RSVD_MASK)
> + if (vmx->nested.pml_full) {
> + exit_reason = EXIT_REASON_PML_FULL;
> + vmx->nested.pml_full = false;
> + exit_qualification &= INTR_INFO_UNBLOCK_NMI;
Sorry I cannot recall the details. probably better to add a comment to
indicate why mask out INTR_INFO_UNBLOCK_NMI?
> + } else if (fault->error_code & PFERR_RSVD_MASK)
> exit_reason = EXIT_REASON_EPT_MISCONFIG;
> else
> exit_reason = EXIT_REASON_EPT_VIOLATION;
> - nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
> +
> + nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification);
> vmcs12->guest_physical_address = fault->address;
> }
>
> @@ -9717,6 +9734,22 @@ static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
> return 0;
> }
>
> +static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
> + struct vmcs12 *vmcs12)
> +{
> + u64 address = vmcs12->pml_address;
> + int maxphyaddr = cpuid_maxphyaddr(vcpu);
> +
> + if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_PML)) {
> + if (!nested_cpu_has_ept(vmcs12) ||
> + !IS_ALIGNED(address, 4096) ||
> + address >> maxphyaddr)
> + return -EINVAL;
> + }
Do we also need to check whether EPT A/D has been enabled for vmcs12 to
make vmentry work? I cannot recall details but probably not necessary.
> +
> + return 0;
> +}
> +
> static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
> struct vmx_msr_entry *e)
> {
> @@ -10252,6 +10285,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
> if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12))
> return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
>
> + if (nested_vmx_check_pml_controls(vcpu, vmcs12))
> + return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
> +
> if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
> vmx->nested.nested_vmx_procbased_ctls_low,
> vmx->nested.nested_vmx_procbased_ctls_high) ||
> @@ -11146,6 +11182,46 @@ static void vmx_flush_log_dirty(struct kvm *kvm)
> kvm_flush_pml_buffers(kvm);
> }
>
> +static int vmx_write_pml_buffer(struct kvm_vcpu *vcpu)
> +{
> + struct vmcs12 *vmcs12;
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
> + gpa_t gpa;
> + struct page *page = NULL;
> + u64 *pml_address;
> +
> + if (is_guest_mode(vcpu)) {
> + WARN_ON_ONCE(vmx->nested.pml_full);
> +
> + /*
> + * Check if PML is enabled for the nested guest.
> + * Whether eptp bit 6 is set is already checked
> + * as part of A/D emulation.
> + */
> + vmcs12 = get_vmcs12(vcpu);
> + if (!nested_cpu_has_pml(vmcs12))
> + return 0;
Since above nested_vmx_check_pml_controls doesn't check EPT A/D bit in
L1, seems we need to add this check here.
> +
> + if (vmcs12->guest_pml_index > PML_ENTITY_NUM) {
> + vmx->nested.pml_full = true;
> + return 1;
> + }
Is the purpose of returning 1 to make upper layer code to inject PML
full VMEXIt to L1 in nested_ept_inject_page_fault?
> +
> + gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS) & ~0xFFFull;
> +
> + page = nested_get_page(vcpu, vmcs12->pml_address);
> + if (!page)
> + return 0;
If PML is enabled in L1, I think nested_get_page should never return a
NULL PML page (unless L1 does something wrong)? Probably better to
return 1 rather than 0, and handle error in nested_ept_inject_page_fault
according to vmcs12->pml_address?
> +
> + pml_address = kmap(page);
> + pml_address[vmcs12->guest_pml_index--] = gpa;
This gpa is L2 guest's GPA. Do we also need to mark L1's GPA (which is
related to L2 guest's GPA above) in to dirty-log? Or has this already
been done?
Thanks,
-Kai
> + kunmap(page);
> + nested_release_page_clean(page);
> + }
> +
> + return 0;
> +}
> +
> static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
> struct kvm_memory_slot *memslot,
> gfn_t offset, unsigned long mask)
> @@ -11505,6 +11581,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
> .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
> .flush_log_dirty = vmx_flush_log_dirty,
> .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
> + .write_log_dirty = vmx_write_pml_buffer,
>
> .pre_block = vmx_pre_block,
> .post_block = vmx_post_block,
>
Powered by blists - more mailing lists