[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221214194056.161492-36-michael.roth@amd.com>
Date: Wed, 14 Dec 2022 13:40:27 -0600
From: Michael Roth <michael.roth@....com>
To: <kvm@...r.kernel.org>
CC: <linux-coco@...ts.linux.dev>, <linux-mm@...ck.org>,
<linux-crypto@...r.kernel.org>, <x86@...nel.org>,
<linux-kernel@...r.kernel.org>, <tglx@...utronix.de>,
<mingo@...hat.com>, <jroedel@...e.de>, <thomas.lendacky@....com>,
<hpa@...or.com>, <ardb@...nel.org>, <pbonzini@...hat.com>,
<seanjc@...gle.com>, <vkuznets@...hat.com>,
<wanpengli@...cent.com>, <jmattson@...gle.com>, <luto@...nel.org>,
<dave.hansen@...ux.intel.com>, <slp@...hat.com>,
<pgonda@...gle.com>, <peterz@...radead.org>,
<srinivas.pandruvada@...ux.intel.com>, <rientjes@...gle.com>,
<dovmurik@...ux.ibm.com>, <tobin@....com>, <bp@...en8.de>,
<vbabka@...e.cz>, <kirill@...temov.name>, <ak@...ux.intel.com>,
<tony.luck@...el.com>, <marcorr@...gle.com>,
<sathyanarayanan.kuppuswamy@...ux.intel.com>,
<alpergun@...gle.com>, <dgilbert@...hat.com>, <jarkko@...nel.org>,
<ashish.kalra@....com>, <harald@...fian.com>,
Brijesh Singh <brijesh.singh@....com>
Subject: [PATCH RFC v7 35/64] KVM: SVM: Make AVIC backing, VMSA and VMCB memory allocation SNP safe
From: Brijesh Singh <brijesh.singh@....com>
Implement a workaround for an SNP erratum where the CPU will incorrectly
signal an RMP violation #PF if a hugepage (2mb or 1gb) collides with the
RMP entry of a VMCB, VMSA or AVIC backing page.
When SEV-SNP is globally enabled, the CPU marks the VMCB, VMSA, and AVIC
backing pages as "in-use" in the RMP after a successful VMRUN. This
is done for _all_ VMs, not just SNP-Active VMs.
If the hypervisor accesses an in-use page through a writable
translation, the CPU will throw an RMP violation #PF. On early SNP
hardware, if an in-use page is 2mb aligned and software accesses any
part of the associated 2mb region with a hupage, the CPU will
incorrectly treat the entire 2mb region as in-use and signal a spurious
RMP violation #PF.
The recommended is to not use the hugepage for the VMCB, VMSA or
AVIC backing page. Add a generic allocator that will ensure that the
page returns is not hugepage (2mb or 1gb) and is safe to be used when
SEV-SNP is enabled.
Co-developed-by: Marc Orr <marcorr@...gle.com>
Signed-off-by: Marc Orr <marcorr@...gle.com>
Signed-off-by: Brijesh Singh <brijesh.singh@....com>
Signed-off-by: Ashish Kalra <ashish.kalra@....com>
Signed-off-by: Michael Roth <michael.roth@....com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/lapic.c | 5 ++++-
arch/x86/kvm/svm/sev.c | 33 ++++++++++++++++++++++++++++++
arch/x86/kvm/svm/svm.c | 15 ++++++++++++--
arch/x86/kvm/svm/svm.h | 1 +
6 files changed, 54 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index c71df44b0f02..e0015926cdf4 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -131,6 +131,7 @@ KVM_X86_OP(msr_filter_changed)
KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
+KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
KVM_X86_OP_OPTIONAL_RET0(private_mem_enabled);
KVM_X86_OP_OPTIONAL_RET0(fault_is_private);
KVM_X86_OP_OPTIONAL_RET0(update_mem_attr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9ef8d73455d9..e2529415f28b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1722,6 +1722,8 @@ struct kvm_x86_ops {
* Returns vCPU specific APICv inhibit reasons
*/
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
+
+ void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
};
struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1bb63746e991..8500d1d54664 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2581,7 +2581,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
vcpu->arch.apic = apic;
- apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+ if (kvm_x86_ops.alloc_apic_backing_page)
+ apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
+ else
+ apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!apic->regs) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
vcpu->vcpu_id);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 82ff96b4f04a..0e93b536dc34 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -3234,6 +3234,39 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
}
}
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
+{
+ unsigned long pfn;
+ struct page *p;
+
+ if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+ return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+
+ /*
+ * Allocate an SNP safe page to workaround the SNP erratum where
+ * the CPU will incorrectly signal an RMP violation #PF if a
+ * hugepage (2mb or 1gb) collides with the RMP entry of VMCB, VMSA
+ * or AVIC backing page. The recommeded workaround is to not use the
+ * hugepage.
+ *
+ * Allocate one extra page, use a page which is not 2mb aligned
+ * and free the other.
+ */
+ p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+ if (!p)
+ return NULL;
+
+ split_page(p, 1);
+
+ pfn = page_to_pfn(p);
+ if (IS_ALIGNED(pfn, PTRS_PER_PMD))
+ __free_page(p++);
+ else
+ __free_page(p + 1);
+
+ return p;
+}
+
int sev_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault)
{
gfn_t gfn = gpa_to_gfn(gpa);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index fc7885869f7e..013f811c733c 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1360,7 +1360,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
err = -ENOMEM;
- vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmcb01_page = snp_safe_alloc_page(vcpu);
if (!vmcb01_page)
goto out;
@@ -1369,7 +1369,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
* SEV-ES guests require a separate VMSA page used to contain
* the encrypted register state of the guest.
*/
- vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ vmsa_page = snp_safe_alloc_page(vcpu);
if (!vmsa_page)
goto error_free_vmcb_page;
@@ -4694,6 +4694,16 @@ static int svm_vm_init(struct kvm *kvm)
return 0;
}
+static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
+{
+ struct page *page = snp_safe_alloc_page(vcpu);
+
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
static int svm_private_mem_enabled(struct kvm *kvm)
{
if (sev_guest(kvm))
@@ -4830,6 +4840,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
+ .alloc_apic_backing_page = svm_alloc_apic_backing_page,
.fault_is_private = sev_fault_is_private,
};
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 1f3098dff3d5..ea9844546e8a 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -684,6 +684,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm);
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
int sev_fault_is_private(struct kvm *kvm, gpa_t gpa, u64 error_code, bool *private_fault);
--
2.25.1
Powered by blists - more mailing lists