lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 13 Dec 2023 19:45:39 +0100
From:   Paolo Bonzini <pbonzini@...hat.com>
To:     Michael Roth <michael.roth@....com>, kvm@...r.kernel.org
Cc:     linux-coco@...ts.linux.dev, linux-mm@...ck.org,
        linux-crypto@...r.kernel.org, x86@...nel.org,
        linux-kernel@...r.kernel.org, tglx@...utronix.de, mingo@...hat.com,
        jroedel@...e.de, thomas.lendacky@....com, hpa@...or.com,
        ardb@...nel.org, seanjc@...gle.com, vkuznets@...hat.com,
        jmattson@...gle.com, luto@...nel.org, dave.hansen@...ux.intel.com,
        slp@...hat.com, pgonda@...gle.com, peterz@...radead.org,
        srinivas.pandruvada@...ux.intel.com, rientjes@...gle.com,
        dovmurik@...ux.ibm.com, tobin@....com, bp@...en8.de,
        vbabka@...e.cz, kirill@...temov.name, ak@...ux.intel.com,
        tony.luck@...el.com, marcorr@...gle.com,
        sathyanarayanan.kuppuswamy@...ux.intel.com, alpergun@...gle.com,
        jarkko@...nel.org, ashish.kalra@....com, nikunj.dadhania@....com,
        pankaj.gupta@....com, liam.merwick@...cle.com,
        zhi.a.wang@...el.com, Brijesh Singh <brijesh.singh@....com>
Subject: Re: [PATCH v10 23/50] KVM: SEV: Make AVIC backing, VMSA and VMCB
 memory allocation SNP safe

On 10/16/23 15:27, Michael Roth wrote:
> From: Brijesh Singh <brijesh.singh@....com>
> 
> Implement a workaround for an SNP erratum where the CPU will incorrectly
> signal an RMP violation #PF if a hugepage (2mb or 1gb) collides with the
> RMP entry of a VMCB, VMSA or AVIC backing page.
> 
> When SEV-SNP is globally enabled, the CPU marks the VMCB, VMSA, and AVIC
> backing pages as "in-use" via a reserved bit in the corresponding RMP
> entry after a successful VMRUN. This is done for _all_ VMs, not just
> SNP-Active VMs.
> 
> If the hypervisor accesses an in-use page through a writable
> translation, the CPU will throw an RMP violation #PF. On early SNP
> hardware, if an in-use page is 2mb aligned and software accesses any
> part of the associated 2mb region with a hupage, the CPU will
> incorrectly treat the entire 2mb region as in-use and signal a spurious
> RMP violation #PF.
> 
> The recommended is to not use the hugepage for the VMCB, VMSA or
> AVIC backing page for similar reasons. Add a generic allocator that will
> ensure that the page returns is not hugepage (2mb or 1gb) and is safe to
> be used when SEV-SNP is enabled. Also implement similar handling for the
> VMCB/VMSA pages of nested guests.
> 
> Co-developed-by: Marc Orr <marcorr@...gle.com>
> Signed-off-by: Marc Orr <marcorr@...gle.com>
> Reported-by: Alper Gun <alpergun@...gle.com> # for nested VMSA case
> Co-developed-by: Ashish Kalra <ashish.kalra@....com>
> Signed-off-by: Ashish Kalra <ashish.kalra@....com>
> Signed-off-by: Brijesh Singh <brijesh.singh@....com>
> [mdr: squash in nested guest handling from Ashish]
> Signed-off-by: Michael Roth <michael.roth@....com>

Based on the discussion with Borislav, please move this earlier in the 
series, before patch 6.

Paolo

> ---
>   arch/x86/include/asm/kvm-x86-ops.h |  1 +
>   arch/x86/include/asm/kvm_host.h    |  1 +
>   arch/x86/kvm/lapic.c               |  5 ++++-
>   arch/x86/kvm/svm/nested.c          |  2 +-
>   arch/x86/kvm/svm/sev.c             | 33 ++++++++++++++++++++++++++++++
>   arch/x86/kvm/svm/svm.c             | 17 ++++++++++++---
>   arch/x86/kvm/svm/svm.h             |  1 +
>   7 files changed, 55 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index f1505a5fa781..4ef2eca14287 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -136,6 +136,7 @@ KVM_X86_OP(vcpu_deliver_sipi_vector)
>   KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
>   KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
>   KVM_X86_OP_OPTIONAL(gmem_invalidate)
> +KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
>   
>   #undef KVM_X86_OP
>   #undef KVM_X86_OP_OPTIONAL
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index fa401cb1a552..a3983271ea28 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1763,6 +1763,7 @@ struct kvm_x86_ops {
>   
>   	int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
>   	void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
> +	void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
>   };
>   
>   struct kvm_x86_nested_ops {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index dcd60b39e794..631a554c0f48 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -2810,7 +2810,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
>   
>   	vcpu->arch.apic = apic;
>   
> -	apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
> +	if (kvm_x86_ops.alloc_apic_backing_page)
> +		apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
> +	else
> +		apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
>   	if (!apic->regs) {
>   		printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
>   		       vcpu->vcpu_id);
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index dd496c9e5f91..1f9a3f9eb985 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -1194,7 +1194,7 @@ int svm_allocate_nested(struct vcpu_svm *svm)
>   	if (svm->nested.initialized)
>   		return 0;
>   
> -	vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> +	vmcb02_page = snp_safe_alloc_page(&svm->vcpu);
>   	if (!vmcb02_page)
>   		return -ENOMEM;
>   	svm->nested.vmcb02.ptr = page_address(vmcb02_page);
> diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
> index 088b32657f46..1cfb9232fc74 100644
> --- a/arch/x86/kvm/svm/sev.c
> +++ b/arch/x86/kvm/svm/sev.c
> @@ -3211,3 +3211,36 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
>   		break;
>   	}
>   }
> +
> +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
> +{
> +	unsigned long pfn;
> +	struct page *p;
> +
> +	if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
> +		return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> +
> +	/*
> +	 * Allocate an SNP safe page to workaround the SNP erratum where
> +	 * the CPU will incorrectly signal an RMP violation  #PF if a
> +	 * hugepage (2mb or 1gb) collides with the RMP entry of VMCB, VMSA
> +	 * or AVIC backing page. The recommeded workaround is to not use the
> +	 * hugepage.
> +	 *
> +	 * Allocate one extra page, use a page which is not 2mb aligned
> +	 * and free the other.
> +	 */
> +	p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
> +	if (!p)
> +		return NULL;
> +
> +	split_page(p, 1);
> +
> +	pfn = page_to_pfn(p);
> +	if (IS_ALIGNED(pfn, PTRS_PER_PMD))
> +		__free_page(p++);
> +	else
> +		__free_page(p + 1);
> +
> +	return p;
> +}
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 1e7fb1ea45f7..8e4ef0cd968a 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -706,7 +706,7 @@ static int svm_cpu_init(int cpu)
>   	int ret = -ENOMEM;
>   
>   	memset(sd, 0, sizeof(struct svm_cpu_data));
> -	sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	sd->save_area = snp_safe_alloc_page(NULL);
>   	if (!sd->save_area)
>   		return ret;
>   
> @@ -1425,7 +1425,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
>   	svm = to_svm(vcpu);
>   
>   	err = -ENOMEM;
> -	vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> +	vmcb01_page = snp_safe_alloc_page(vcpu);
>   	if (!vmcb01_page)
>   		goto out;
>   
> @@ -1434,7 +1434,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
>   		 * SEV-ES guests require a separate VMSA page used to contain
>   		 * the encrypted register state of the guest.
>   		 */
> -		vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
> +		vmsa_page = snp_safe_alloc_page(vcpu);
>   		if (!vmsa_page)
>   			goto error_free_vmcb_page;
>   
> @@ -4876,6 +4876,16 @@ static int svm_vm_init(struct kvm *kvm)
>   	return 0;
>   }
>   
> +static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
> +{
> +	struct page *page = snp_safe_alloc_page(vcpu);
> +
> +	if (!page)
> +		return NULL;
> +
> +	return page_address(page);
> +}
> +
>   static struct kvm_x86_ops svm_x86_ops __initdata = {
>   	.name = KBUILD_MODNAME,
>   
> @@ -5007,6 +5017,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
>   
>   	.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
>   	.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
> +	.alloc_apic_backing_page = svm_alloc_apic_backing_page,
>   };
>   
>   /*
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index c13070d00910..b7b8bf73cbb9 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -694,6 +694,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
>   void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
>   void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
>   void sev_es_unmap_ghcb(struct vcpu_svm *svm);
> +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
>   
>   /* vmenter.S */
>   

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ