lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed,  7 Jul 2021 13:35:56 -0500
From:   Brijesh Singh <brijesh.singh@....com>
To:     x86@...nel.org, linux-kernel@...r.kernel.org, kvm@...r.kernel.org,
        linux-efi@...r.kernel.org, platform-driver-x86@...r.kernel.org,
        linux-coco@...ts.linux.dev, linux-mm@...ck.org,
        linux-crypto@...r.kernel.org
Cc:     Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Joerg Roedel <jroedel@...e.de>,
        Tom Lendacky <thomas.lendacky@....com>,
        "H. Peter Anvin" <hpa@...or.com>, Ard Biesheuvel <ardb@...nel.org>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Wanpeng Li <wanpengli@...cent.com>,
        Jim Mattson <jmattson@...gle.com>,
        Andy Lutomirski <luto@...nel.org>,
        Dave Hansen <dave.hansen@...ux.intel.com>,
        Sergio Lopez <slp@...hat.com>, Peter Gonda <pgonda@...gle.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
        David Rientjes <rientjes@...gle.com>,
        Dov Murik <dovmurik@...ux.ibm.com>,
        Tobin Feldman-Fitzthum <tobin@....com>,
        Borislav Petkov <bp@...en8.de>,
        Michael Roth <michael.roth@....com>,
        Vlastimil Babka <vbabka@...e.cz>, tony.luck@...el.com,
        npmccallum@...hat.com, brijesh.ksingh@...il.com,
        Brijesh Singh <brijesh.singh@....com>
Subject: [PATCH Part2 RFC v4 20/40] KVM: SVM: Make AVIC backing, VMSA and VMCB memory allocation SNP safe

When SEV-SNP is globally enabled on a system, the VMRUN instruction
performs additional security checks on AVIC backing, VMSA, and VMCB page.
On a successful VMRUN, these pages are marked "in-use" by the
hardware in the RMP entry, and any attempt to modify the RMP entry for
these pages will result in page-fault (RMP violation check).

While performing the RMP check, hardware will try to create a 2MB TLB
entry for the large page accesses. When it does this, it first reads
the RMP for the base of 2MB region and verifies that all this memory is
safe. If AVIC backing, VMSA, and VMCB memory happen to be the base of
2MB region, then RMP check will fail because of the "in-use" marking for
the base entry of this 2MB region.

e.g.

1. A VMCB was allocated on 2MB-aligned address.
2. The VMRUN instruction marks this RMP entry as "in-use".
3. Another process allocated some other page of memory that happened to be
   within the same 2MB region.
4. That process tried to write its page using physmap.

If the physmap entry in step #4 uses a large (1G/2M) page, then the
hardware will attempt to create a 2M TLB entry. The hardware will find
that the "in-use" bit is set in the RMP entry (because it was a
VMCB page) and will cause an RMP violation check.

See APM2 section 15.36.12 for more information on VMRUN checks when
SEV-SNP is globally active.

A generic allocator can return a page which are 2M aligned and will not
be safe to be used when SEV-SNP is globally enabled. Add a
snp_safe_alloc_page() helper that can be used for allocating the
SNP safe memory. The helper allocated 2 pages and splits them into order-1
allocation. It frees one page and keeps one of the page which is not
2M aligned.

Signed-off-by: Brijesh Singh <brijesh.singh@....com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/lapic.c            |  5 ++++-
 arch/x86/kvm/svm/sev.c          | 27 +++++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c          | 16 ++++++++++++++--
 arch/x86/kvm/svm/svm.h          |  1 +
 5 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 55efbacfc244..188110ab2c02 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1383,6 +1383,7 @@ struct kvm_x86_ops {
 	int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
 
 	void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
+	void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_nested_ops {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c0ebef560bd1..d4c77f66d7d5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2441,7 +2441,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
 
 	vcpu->arch.apic = apic;
 
-	apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+	if (kvm_x86_ops.alloc_apic_backing_page)
+		apic->regs = kvm_x86_ops.alloc_apic_backing_page(vcpu);
+	else
+		apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
 	if (!apic->regs) {
 		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
 		       vcpu->vcpu_id);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index b8505710c36b..411ed72f63af 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2692,3 +2692,30 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 		break;
 	}
 }
+
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
+{
+	unsigned long pfn;
+	struct page *p;
+
+	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
+		return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+
+	p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
+	if (!p)
+		return NULL;
+
+	/* split the page order */
+	split_page(p, 1);
+
+	/* Find a non-2M aligned page */
+	pfn = page_to_pfn(p);
+	if (IS_ALIGNED(__pfn_to_phys(pfn), PMD_SIZE)) {
+		pfn++;
+		__free_page(p);
+	} else {
+		__free_page(pfn_to_page(pfn + 1));
+	}
+
+	return pfn_to_page(pfn);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2acf187a3100..a7adf6ca1713 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1336,7 +1336,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 	svm = to_svm(vcpu);
 
 	err = -ENOMEM;
-	vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	vmcb01_page = snp_safe_alloc_page(vcpu);
 	if (!vmcb01_page)
 		goto out;
 
@@ -1345,7 +1345,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
 		 * SEV-ES guests require a separate VMSA page used to contain
 		 * the encrypted register state of the guest.
 		 */
-		vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+		vmsa_page = snp_safe_alloc_page(vcpu);
 		if (!vmsa_page)
 			goto error_free_vmcb_page;
 
@@ -4439,6 +4439,16 @@ static int svm_vm_init(struct kvm *kvm)
 	return 0;
 }
 
+static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
+{
+	struct page *page = snp_safe_alloc_page(vcpu);
+
+	if (!page)
+		return NULL;
+
+	return page_address(page);
+}
+
 static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.hardware_unsetup = svm_hardware_teardown,
 	.hardware_enable = svm_hardware_enable,
@@ -4564,6 +4574,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.complete_emulated_msr = svm_complete_emulated_msr,
 
 	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
+
+	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
 };
 
 static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5f874168551b..1175edb02d33 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -554,6 +554,7 @@ void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
 void sev_es_unmap_ghcb(struct vcpu_svm *svm);
+struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
 
 /* vmenter.S */
 
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ