[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ee123648-3ac4-6e0c-419c-281fb09edaa6@suse.cz>
Date: Thu, 4 Aug 2022 13:32:25 +0200
From: Vlastimil Babka <vbabka@...e.cz>
To: Ashish Kalra <Ashish.Kalra@....com>, x86@...nel.org,
linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
linux-coco@...ts.linux.dev, linux-mm@...ck.org,
linux-crypto@...r.kernel.org
Cc: tglx@...utronix.de, mingo@...hat.com, jroedel@...e.de,
thomas.lendacky@....com, hpa@...or.com, ardb@...nel.org,
pbonzini@...hat.com, seanjc@...gle.com, vkuznets@...hat.com,
jmattson@...gle.com, luto@...nel.org, dave.hansen@...ux.intel.com,
slp@...hat.com, pgonda@...gle.com, peterz@...radead.org,
srinivas.pandruvada@...ux.intel.com, rientjes@...gle.com,
dovmurik@...ux.ibm.com, tobin@....com, bp@...en8.de,
michael.roth@....com, kirill@...temov.name, ak@...ux.intel.com,
tony.luck@...el.com, marcorr@...gle.com,
sathyanarayanan.kuppuswamy@...ux.intel.com, alpergun@...gle.com,
dgilbert@...hat.com, jarkko@...nel.org
Subject: Re: [PATCH Part2 v6 21/49] KVM: SVM: Make AVIC backing, VMSA and VMCB
memory allocation SNP safe
On 6/21/22 01:06, Ashish Kalra wrote:
> From: Brijesh Singh <brijesh.singh@....com>
>
> Implement a workaround for an SNP erratum where the CPU will incorrectly
> signal an RMP violation #PF if a hugepage (2mb or 1gb) collides with the
> RMP entry of a VMCB, VMSA or AVIC backing page.
>
> When SEV-SNP is globally enabled, the CPU marks the VMCB, VMSA, and AVIC
> backing pages as "in-use" in the RMP after a successful VMRUN. This
> is done for _all_ VMs, not just SNP-Active VMs.
>
> If the hypervisor accesses an in-use page through a writable
> translation, the CPU will throw an RMP violation #PF. On early SNP
> hardware, if an in-use page is 2mb aligned and software accesses any
> part of the associated 2mb region with a hupage, the CPU will
> incorrectly treat the entire 2mb region as in-use and signal a spurious
> RMP violation #PF.
>
> The recommended is to not use the hugepage for the VMCB, VMSA or
> AVIC backing page. Add a generic allocator that will ensure that the
> page returns is not hugepage (2mb or 1gb) and is safe to be used when
> SEV-SNP is enabled.
>
> Co-developed-by: Marc Orr <marcorr@...gle.com>
> Signed-off-by: Marc Orr <marcorr@...gle.com>
> Signed-off-by: Brijesh Singh <brijesh.singh@....com>
> ---
> arch/x86/include/asm/kvm-x86-ops.h | 1 +
> arch/x86/include/asm/kvm_host.h | 2 ++
> arch/x86/kvm/lapic.c | 5 ++++-
> arch/x86/kvm/svm/sev.c | 35 ++++++++++++++++++++++++++++++
> arch/x86/kvm/svm/svm.c | 16 ++++++++++++--
> arch/x86/kvm/svm/svm.h | 1 +
> 6 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index da47f60a4650..a66292dae698 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -128,6 +128,7 @@ KVM_X86_OP(msr_filter_changed)
> KVM_X86_OP(complete_emulated_msr)
> KVM_X86_OP(vcpu_deliver_sipi_vector)
> KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
> +KVM_X86_OP(alloc_apic_backing_page)
>
> #undef KVM_X86_OP
> #undef KVM_X86_OP_OPTIONAL
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c24a72ddc93b..0205e2944067 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1512,6 +1512,8 @@ struct kvm_x86_ops {
> * Returns vCPU specific APICv inhibit reasons
> */
> unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
> +
> + void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
> };
>
> struct kvm_x86_nested_ops {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 66b0eb0bda94..7c7fc6c4a7f9 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2506,7 +2506,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
>
> vcpu->arch.apic = apic;
>
> - apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
> + if (kvm_x86_ops.alloc_apic_backing_page)
> + apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
> + else
> + apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
> if (!apic->regs) {
> printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
> vcpu->vcpu_id);
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index b49c370d5ae9..93365996bd59 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -3030,3 +3030,38 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
> break;
> }
> }
> +
> +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
> +{
> + unsigned long pfn;
> + struct page *p;
> +
> + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> + return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> +
> + /*
> + * Allocate an SNP safe page to workaround the SNP erratum where
> + * the CPU will incorrectly signal an RMP violation #PF if a
> + * hugepage (2mb or 1gb) collides with the RMP entry of VMCB, VMSA
> + * or AVIC backing page. The recommeded workaround is to not use the
> + * hugepage.
> + *
> + * Allocate one extra page, use a page which is not 2mb aligned
> + * and free the other.
> + */
> + p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
> + if (!p)
> + return NULL;
> +
> + split_page(p, 1);
> +
> + pfn = page_to_pfn(p);
> + if (IS_ALIGNED(__pfn_to_phys(pfn), PMD_SIZE)) {
I think you could simply IS_ALIGNED(pfn, PTRS_PER_PMD), no need to expand to
full phys.
> + pfn++;
> + __free_page(p);
To avoid pfn_to_page(), drop 'pfn++' and
__free_page(p++);
> + } else {
> + __free_page(pfn_to_page(pfn + 1));
__free_page(p+1);
> + }
> +
> + return pfn_to_page(pfn);
return p;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index efc7623d0f90..b4bd64f94d3a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1260,7 +1260,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
> svm = to_svm(vcpu);
>
> err = -ENOMEM;
> - vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> + vmcb01_page = snp_safe_alloc_page(vcpu);
> if (!vmcb01_page)
> goto out;
>
> @@ -1269,7 +1269,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
> * SEV-ES guests require a separate VMSA page used to contain
> * the encrypted register state of the guest.
> */
> - vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> + vmsa_page = snp_safe_alloc_page(vcpu);
> if (!vmsa_page)
> goto error_free_vmcb_page;
>
> @@ -4598,6 +4598,16 @@ static int svm_vm_init(struct kvm *kvm)
> return 0;
> }
>
> +static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
> +{
> + struct page *page = snp_safe_alloc_page(vcpu);
> +
> + if (!page)
> + return NULL;
> +
> + return page_address(page);
> +}
> +
> static struct kvm_x86_ops svm_x86_ops __initdata = {
> .name = "kvm_amd",
>
> @@ -4722,6 +4732,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>
> .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
> .vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
> +
> + .alloc_apic_backing_page = svm_alloc_apic_backing_page,
> };
>
> /*
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 1f4a8bd09c9e..9672e25a338d 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -659,6 +659,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
> void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
> void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
> void sev_es_unmap_ghcb(struct vcpu_svm *svm);
> +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
>
> /* vmenter.S */
>
Powered by blists - more mailing lists