[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANRm+Cx85NBnL76VoFV+DNrShp_2o+c4dgQCwNARzrAcmX1KAw@mail.gmail.com>
Date: Wed, 16 Sep 2020 09:31:22 +0800
From: Wanpeng Li <kernellwp@...il.com>
To: Sean Christopherson <sean.j.christopherson@...el.com>
Cc: Paolo Bonzini <pbonzini@...hat.com>,
Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>, kvm <kvm@...r.kernel.org>,
LKML <linux-kernel@...r.kernel.org>,
Tom Lendacky <thomas.lendacky@....com>
Subject: Re: [PATCH] KVM: x86: Add kvm_x86_ops hook to short circuit emulation
On Wed, 16 Sep 2020 at 07:29, Sean Christopherson
<sean.j.christopherson@...el.com> wrote:
>
> Replace the existing kvm_x86_ops.need_emulation_on_page_fault() with a
> more generic is_emulatable(), and unconditionally call the new function
> in x86_emulate_instruction().
>
> KVM will use the generic hook to support multiple security related
> technologies that prevent emulation in one way or another. Similar to
> the existing AMD #NPF case where emulation of the current instruction is
> not possible due to lack of information, AMD's SEV-ES and Intel's SGX
> and TDX will introduce scenarios where emulation is impossible due to
> the guest's register state being inaccessible. And again similar to the
> existing #NPF case, emulation can be initiated by kvm_mmu_page_fault(),
> i.e. outside of the control of vendor-specific code.
>
> While the cause and architecturally visible behavior of the various
> cases are different, e.g. SGX will inject a #UD, AMD #NPF is a clean
> resume or complete shutdown, and SEV-ES and TDX "return" an error, the
> impact on the common emulation code is identical: KVM must stop
> emulation immediately and resume the guest.
>
> Query is_emulatable() in handle_ud() as well so that the
> force_emulation_prefix code doesn't incorrectly modify RIP before
> calling emulate_instruction() in the absurdly unlikely scenario that
> KVM encounters forced emulation in conjunction with "do not emulate".
>
> Cc: Tom Lendacky <thomas.lendacky@....com>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@...el.com>
> ---
> arch/x86/include/asm/kvm_host.h | 2 +-
> arch/x86/kvm/mmu/mmu.c | 12 ------------
> arch/x86/kvm/svm/svm.c | 31 ++++++++++++++++++-------------
> arch/x86/kvm/vmx/vmx.c | 12 ++++++------
> arch/x86/kvm/x86.c | 9 ++++++++-
> 5 files changed, 33 insertions(+), 33 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 5303dbc5c9bc..fa89511ed9d6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1221,7 +1221,7 @@ struct kvm_x86_ops {
>
> int (*get_msr_feature)(struct kvm_msr_entry *entry);
>
> - bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
> + bool (*is_emulatable)(struct kvm_vcpu *vcpu, void *insn, int insn_len);
>
> bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
> int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index a5d0207e7189..f818a46db58c 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -5485,18 +5485,6 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
> if (!mmio_info_in_cache(vcpu, cr2_or_gpa, direct) && !is_guest_mode(vcpu))
> emulation_type |= EMULTYPE_ALLOW_RETRY_PF;
> emulate:
> - /*
> - * On AMD platforms, under certain conditions insn_len may be zero on #NPF.
> - * This can happen if a guest gets a page-fault on data access but the HW
> - * table walker is not able to read the instruction page (e.g instruction
> - * page is not present in memory). In those cases we simply restart the
> - * guest, with the exception of AMD Erratum 1096 which is unrecoverable.
> - */
> - if (unlikely(insn && !insn_len)) {
> - if (!kvm_x86_ops.need_emulation_on_page_fault(vcpu))
> - return 1;
> - }
> -
> return x86_emulate_instruction(vcpu, cr2_or_gpa, emulation_type, insn,
> insn_len);
> }
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 03dd7bac8034..3a55495d985f 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -3933,19 +3933,10 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
> }
> }
>
> -static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
> +static bool svm_is_emulatable(struct kvm_vcpu *vcpu, void *insn, int insn_len)
> {
> - unsigned long cr4 = kvm_read_cr4(vcpu);
> - bool smep = cr4 & X86_CR4_SMEP;
> - bool smap = cr4 & X86_CR4_SMAP;
> - bool is_user = svm_get_cpl(vcpu) == 3;
> -
> - /*
> - * If RIP is invalid, go ahead with emulation which will cause an
> - * internal error exit.
> - */
> - if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
> - return true;
> + bool smep, smap, is_user;
> + unsigned long cr4;
>
> /*
> * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
> @@ -3987,6 +3978,20 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
> * instruction pointer so we will not able to workaround it. Lets
> * print the error and request to kill the guest.
> */
> + if (likely(!insn || insn_len))
> + return true;
> +
> + /*
> + * If RIP is invalid, go ahead with emulation which will cause an
> + * internal error exit.
> + */
> + if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT))
> + return true;
> +
> + cr4 = kvm_read_cr4(vcpu);
> + smep = cr4 & X86_CR4_SMEP;
> + smap = cr4 & X86_CR4_SMAP;
> + is_user = svm_get_cpl(vcpu) == 3;
> if (smap && (!smep || is_user)) {
> if (!sev_guest(vcpu->kvm))
> return true;
> @@ -4148,7 +4153,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
> .mem_enc_reg_region = svm_register_enc_region,
> .mem_enc_unreg_region = svm_unregister_enc_region,
>
> - .need_emulation_on_page_fault = svm_need_emulation_on_page_fault,
> + .is_emulatable = svm_is_emulatable,
>
> .apic_init_signal_blocked = svm_apic_init_signal_blocked,
> };
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 46ba2e03a892..c92717c54bf9 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1561,6 +1561,11 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data)
> return 0;
> }
>
> +static bool vmx_is_emulatable(struct kvm_vcpu *vcpu, void *insn, int insn_len)
> +{
> + return true;
> +}
> +
> static int skip_emulated_instruction(struct kvm_vcpu *vcpu)
> {
> unsigned long rip, orig_rip;
> @@ -7843,11 +7848,6 @@ static void enable_smi_window(struct kvm_vcpu *vcpu)
> /* RSM will cause a vmexit anyway. */
> }
>
> -static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
> -{
> - return false;
> -}
> -
> static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
> {
> return to_vmx(vcpu)->nested.vmxon;
> @@ -8002,7 +8002,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
> .pre_leave_smm = vmx_pre_leave_smm,
> .enable_smi_window = enable_smi_window,
>
> - .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault,
> + .is_emulatable = vmx_is_emulatable,
> .apic_init_signal_blocked = vmx_apic_init_signal_blocked,
> .migrate_timers = vmx_migrate_timers,
> };
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 539ea1cd6020..5208217049d9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5707,6 +5707,9 @@ int handle_ud(struct kvm_vcpu *vcpu)
> char sig[5]; /* ud2; .ascii "kvm" */
> struct x86_exception e;
>
> + if (unlikely(!kvm_x86_ops.is_emulatable(vcpu, NULL, 0)))
> + return 1;
> +
Both VMX and SVM scenarios always fail this check.
Wanpeng
Powered by blists - more mailing lists