[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <56882cd9-67f5-4396-b91b-52fb202d3386@linux.intel.com>
Date: Mon, 29 Jan 2024 12:29:18 +0800
From: Binbin Wu <binbin.wu@...ux.intel.com>
To: isaku.yamahata@...el.com
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
isaku.yamahata@...il.com, Paolo Bonzini <pbonzini@...hat.com>,
erdemaktas@...gle.com, Sean Christopherson <seanjc@...gle.com>,
Sagi Shahar <sagis@...gle.com>, Kai Huang <kai.huang@...el.com>,
chen.bo@...el.com, hang.yuan@...el.com, tina.zhang@...el.com
Subject: Re: [PATCH v18 047/121] KVM: x86/mmu: Add a private pointer to struct
kvm_mmu_page
On 1/23/2024 7:53 AM, isaku.yamahata@...el.com wrote:
> From: Isaku Yamahata <isaku.yamahata@...el.com>
>
> For private GPA, CPU refers a private page table whose contents are
> encrypted. The dedicated APIs to operate on it (e.g. updating/reading its
> PTE entry) are used and their cost is expensive.
>
> When KVM resolves KVM page fault, it walks the page tables. To reuse the
> existing KVM MMU code and mitigate the heavy cost to directly walk private
> page table, allocate one more page to copy the dummy page table for KVM MMU
> code to directly walk. Resolve KVM page fault with the existing code, and
> do additional operations necessary for the private page table. To
> distinguish such cases, the existing KVM page table is called a shared page
> table (i.e. not associated with private page table), and the page table
> with private page table is called a private page table. The relationship
> is depicted below.
>
> Add a private pointer to struct kvm_mmu_page for private page table and
> add helper functions to allocate/initialize/free a private page table
> page.
>
> KVM page fault |
> | |
> V |
> -------------+---------- |
> | | |
> V V |
> shared GPA private GPA |
> | | |
> V V |
> shared PT root dummy PT root | private PT root
> | | | |
> V V | V
> shared PT dummy PT ----propagate----> private PT
> | | | |
> | \-----------------+------\ |
> | | | |
> V | V V
> shared guest page | private guest page
> |
> non-encrypted memory | encrypted memory
> |
> PT: page table
> - Shared PT is visible to KVM and it is used by CPU.
> - Private PT is used by CPU but it is invisible to KVM.
> - Dummy PT is visible to KVM but not used by CPU. It is used to
> propagate PT change to the actual private PT which is used by CPU.
Nit: one typo below.
Reviewed-by: Binbin Wu <binbin.wu@...ux.intel.com>
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
> ---
> arch/x86/include/asm/kvm_host.h | 5 ++
> arch/x86/kvm/mmu/mmu.c | 7 +++
> arch/x86/kvm/mmu/mmu_internal.h | 83 +++++++++++++++++++++++++++++++--
> arch/x86/kvm/mmu/tdp_mmu.c | 1 +
> 4 files changed, 92 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 0cdbbc21136b..1d074956ac0d 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -841,6 +841,11 @@ struct kvm_vcpu_arch {
> struct kvm_mmu_memory_cache mmu_shadow_page_cache;
> struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
> struct kvm_mmu_memory_cache mmu_page_header_cache;
> + /*
> + * This cache is to allocate private page table. E.g. Secure-EPT used
> + * by the TDX module.
> + */
> + struct kvm_mmu_memory_cache mmu_private_spt_cache;
>
> /*
> * QEMU userspace and the guest each have their own FPU state.
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 583ae9d6bf5d..32c619125be4 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -685,6 +685,12 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu, bool maybe_indirect)
> 1 + PT64_ROOT_MAX_LEVEL + PTE_PREFETCH_NUM);
> if (r)
> return r;
> + if (kvm_gfn_shared_mask(vcpu->kvm)) {
> + r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_private_spt_cache,
> + PT64_ROOT_MAX_LEVEL);
> + if (r)
> + return r;
> + }
> r = kvm_mmu_topup_memory_cache(&vcpu->arch.mmu_shadow_page_cache,
> PT64_ROOT_MAX_LEVEL);
> if (r)
> @@ -704,6 +710,7 @@ static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_pte_list_desc_cache);
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadow_page_cache);
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_shadowed_info_cache);
> + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_private_spt_cache);
> kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_header_cache);
> }
>
> diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
> index 97af4e39ce6f..957654c3cde9 100644
> --- a/arch/x86/kvm/mmu/mmu_internal.h
> +++ b/arch/x86/kvm/mmu/mmu_internal.h
> @@ -101,7 +101,23 @@ struct kvm_mmu_page {
> int root_count;
> refcount_t tdp_mmu_root_count;
> };
> - unsigned int unsync_children;
> + union {
> + struct {
> + unsigned int unsync_children;
> + /*
> + * Number of writes since the last time traversal
> + * visited this page.
> + */
> + atomic_t write_flooding_count;
> + };
> +#ifdef CONFIG_KVM_MMU_PRIVATE
> + /*
> + * Associated private shadow page table, e.g. Secure-EPT page
> + * passed to the TDX module.
> + */
> + void *private_spt;
> +#endif
> + };
> union {
> struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
> tdp_ptep_t ptep;
> @@ -124,9 +140,6 @@ struct kvm_mmu_page {
> int clear_spte_count;
> #endif
>
> - /* Number of writes since the last time traversal visited this page. */
> - atomic_t write_flooding_count;
> -
> #ifdef CONFIG_X86_64
> /* Used for freeing the page asynchronously if it is a TDP MMU page. */
> struct rcu_head rcu_head;
> @@ -150,6 +163,68 @@ static inline bool is_private_sp(const struct kvm_mmu_page *sp)
> return kvm_mmu_page_role_is_private(sp->role);
> }
>
> +#ifdef CONFIG_KVM_MMU_PRIVATE
> +static inline void *kvm_mmu_private_spt(struct kvm_mmu_page *sp)
> +{
> + return sp->private_spt;
> +}
> +
> +static inline void kvm_mmu_init_private_spt(struct kvm_mmu_page *sp, void *private_spt)
> +{
> + sp->private_spt = private_spt;
> +}
> +
> +static inline void kvm_mmu_alloc_private_spt(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> +{
> + bool is_root = vcpu->arch.root_mmu.root_role.level == sp->role.level;
> +
> + KVM_BUG_ON(!kvm_mmu_page_role_is_private(sp->role), vcpu->kvm);
> + if (is_root)
> + /*
> + * Because TDX module assigns root Secure-EPT page and set it to
> + * Secure-EPTP when TD vcpu is created, secure page table for
> + * root isn't needed.
> + */
> + sp->private_spt = NULL;
> + else {
> + /*
> + * Because the TDX module doesn't trust VMM and initializes
> + * the pages itself, KVM doesn't initialize them. Allocate
> + * pages with garbage and give them to the TDX module.
> + */
> + sp->private_spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_private_spt_cache);
> + /*
> + * Because mmu_private_spt_cache is topped up before staring kvm
s/staring/starting
> + * page fault resolving, the allocation above shouldn't fail.
> + */
> + WARN_ON_ONCE(!sp->private_spt);
> + }
> +}
> +
> +static inline void kvm_mmu_free_private_spt(struct kvm_mmu_page *sp)
> +{
> + if (sp->private_spt)
> + free_page((unsigned long)sp->private_spt);
> +}
> +#else
> +static inline void *kvm_mmu_private_spt(struct kvm_mmu_page *sp)
> +{
> + return NULL;
> +}
> +
> +static inline void kvm_mmu_init_private_spt(struct kvm_mmu_page *sp, void *private_spt)
> +{
> +}
> +
> +static inline void kvm_mmu_alloc_private_spt(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
> +{
> +}
> +
> +static inline void kvm_mmu_free_private_spt(struct kvm_mmu_page *sp)
> +{
> +}
> +#endif
> +
> static inline bool kvm_mmu_page_ad_need_write_protect(struct kvm_mmu_page *sp)
> {
> /*
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index 87233b3ceaef..d47f0daf1b03 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -53,6 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
>
> static void tdp_mmu_free_sp(struct kvm_mmu_page *sp)
> {
> + kvm_mmu_free_private_spt(sp);
> free_page((unsigned long)sp->spt);
> kmem_cache_free(mmu_page_header_cache, sp);
> }
Powered by blists - more mailing lists