[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALMp9eRG=L_dQfS_qpYhJ_86B-yyfYYg+pwcixQOfWT4hwCa1Q@mail.gmail.com>
Date: Fri, 24 Apr 2020 10:47:04 -0700
From: Jim Mattson <jmattson@...gle.com>
To: Sean Christopherson <sean.j.christopherson@...el.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Joerg Roedel <joro@...tes.org>, kvm list <kvm@...r.kernel.org>,
LKML <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] KVM: nVMX: Tweak handling of failure code for nested
VM-Enter failure
On Fri, Apr 24, 2020 at 10:19 AM Sean Christopherson
<sean.j.christopherson@...el.com> wrote:
>
> Use an enum for passing around the failure code for a failed VM-Enter
> that results in VM-Exit to provide a level of indirection from the final
> resting place of the failure code, vmcs.EXIT_QUALIFICATION. The exit
> qualification field is an unsigned long, e.g. passing around
> 'u32 exit_qual' throws up red flags as it suggests KVM may be dropping
> bits when reporting errors to L1. This is a red herring because the
> only defined failure codes are 0, 2, 3, and 4, i.e. don't come remotely
> close to overflowing a u32.
>
> Setting vmcs.EXIT_QUALIFICATION on entry failure is further complicated
> by the MSR load list, which returns the (1-based) entry that failed, and
> the number of MSRs to load is a 32-bit VMCS field. At first blush, it
> would appear that overflowing a u32 is possible, but the number of MSRs
> that can be loaded is hardcapped at 4096 (limited by MSR_IA32_VMX_MISC).
>
> In other words, there are two completely disparate types of data that
> eventually get stuffed into vmcs.EXIT_QUALIFICATION, neither of which is
> an 'unsigned long' in nature. This was presumably the reasoning for
> switching to 'u32' when the related code was refactored in commit
> ca0bde28f2ed6 ("kvm: nVMX: Split VMCS checks from nested_vmx_run()").
>
> Using an enum for the failure code addresses the technically-possible-
> but-will-never-happen scenario where Intel defines a failure code that
> doesn't fit in a 32-bit integer. The enum variables and values will
> either be automatically sized (gcc 5.4 behavior) or be subjected to some
> combination of truncation. The former case will simply work, while the
> latter will trigger a compile-time warning unless the compiler is being
> particularly unhelpful.
>
> Separating the failure code from the failed MSR entry allows for
> disassociating both from vmcs.EXIT_QUALIFICATION, which avoids the
> conundrum where KVM has to choose between 'u32 exit_qual' and tracking
> values as 'unsigned long' that have no business being tracked as such.
>
> Opportunistically rename the variables in load_vmcs12_host_state() and
> vmx_set_nested_state() to call out that they're ignored, and add a
> comment in nested_vmx_load_msr() to call out that returning 'i + 1'
> can't wrap.
>
> No functional change intended.
>
> Reported-by: Vitaly Kuznetsov <vkuznets@...hat.com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@...el.com>
> ---
> arch/x86/include/asm/vmx.h | 10 ++++++----
> arch/x86/kvm/vmx/nested.c | 38 +++++++++++++++++++++-----------------
> 2 files changed, 27 insertions(+), 21 deletions(-)
>
> diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
> index 5e090d1f03f8..cd7de4b401fe 100644
> --- a/arch/x86/include/asm/vmx.h
> +++ b/arch/x86/include/asm/vmx.h
> @@ -527,10 +527,12 @@ struct vmx_msr_entry {
> /*
> * Exit Qualifications for entry failure during or after loading guest state
> */
> -#define ENTRY_FAIL_DEFAULT 0
> -#define ENTRY_FAIL_PDPTE 2
> -#define ENTRY_FAIL_NMI 3
> -#define ENTRY_FAIL_VMCS_LINK_PTR 4
> +enum vm_entry_failure_code {
> + ENTRY_FAIL_DEFAULT = 0,
> + ENTRY_FAIL_PDPTE = 2,
> + ENTRY_FAIL_NMI = 3,
> + ENTRY_FAIL_VMCS_LINK_PTR = 4,
> +};
>
> /*
> * Exit Qualifications for EPT Violations
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index b516c24494e3..e66320997910 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -927,6 +927,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
> }
> return 0;
> fail:
> + /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
> return i + 1;
> }
>
> @@ -1122,7 +1123,7 @@ static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu)
> * @entry_failure_code.
> */
> static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept,
> - u32 *entry_failure_code)
> + enum vm_entry_failure_code *entry_failure_code)
> {
> if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) {
> if (CC(!nested_cr3_valid(vcpu, cr3))) {
> @@ -2475,7 +2476,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
> * is assigned to entry_failure_code on failure.
> */
> static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
> - u32 *entry_failure_code)
> + enum vm_entry_failure_code *entry_failure_code)
> {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
> @@ -2935,11 +2936,11 @@ static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
>
> static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
> struct vmcs12 *vmcs12,
> - u32 *exit_qual)
> + enum vm_entry_failure_code *entry_failure_code)
> {
> bool ia32e;
>
> - *exit_qual = ENTRY_FAIL_DEFAULT;
> + *entry_failure_code = ENTRY_FAIL_DEFAULT;
>
> if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
> CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
> @@ -2954,7 +2955,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
> return -EINVAL;
>
> if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
> - *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR;
> + *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
> return -EINVAL;
> }
>
> @@ -3247,8 +3248,9 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
> bool evaluate_pending_interrupts;
> + enum vm_entry_failure_code entry_failure_code;
> u32 exit_reason = EXIT_REASON_INVALID_STATE;
> - u32 exit_qual;
> + u32 failed_msr;
>
> if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
> kvm_vcpu_flush_tlb_current(vcpu);
> @@ -3296,7 +3298,7 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
> return NVMX_VMENTRY_VMFAIL;
> }
>
> - if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual))
> + if (nested_vmx_check_guest_state(vcpu, vmcs12, &entry_failure_code))
> goto vmentry_fail_vmexit;
> }
>
> @@ -3304,16 +3306,18 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
> if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
> vcpu->arch.tsc_offset += vmcs12->tsc_offset;
>
> - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual))
> + if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code))
> goto vmentry_fail_vmexit_guest_mode;
>
> if (from_vmentry) {
> exit_reason = EXIT_REASON_MSR_LOAD_FAIL;
> - exit_qual = nested_vmx_load_msr(vcpu,
> - vmcs12->vm_entry_msr_load_addr,
> - vmcs12->vm_entry_msr_load_count);
> - if (exit_qual)
> + failed_msr = nested_vmx_load_msr(vcpu,
> + vmcs12->vm_entry_msr_load_addr,
> + vmcs12->vm_entry_msr_load_count);
> + if (failed_msr) {
> + entry_failure_code = failed_msr;
This assignment is a bit dodgy from a type perspective, and suggests
that perhaps a better type for the local variable is an
undiscriminated union of the enumerated type and a sufficiently large
unsigned integer type. But I won't be a stickler if you add a comment.
:-)
Powered by blists - more mailing lists