[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3f93de19-0685-3045-22db-7e05492bb5a4@redhat.com>
Date: Tue, 5 Apr 2022 16:22:29 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: isaku.yamahata@...el.com, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: isaku.yamahata@...il.com, Jim Mattson <jmattson@...gle.com>,
erdemaktas@...gle.com, Connor Kuehl <ckuehl@...hat.com>,
Sean Christopherson <seanjc@...gle.com>
Subject: Re: [RFC PATCH v5 045/104] KVM: x86/tdp_mmu: make REMOVED_SPTE
include shadow_initial value
On 3/4/22 20:49, isaku.yamahata@...el.com wrote:
> From: Isaku Yamahata <isaku.yamahata@...el.com>
>
> TDP MMU uses REMOVED_SPTE = 0x5a0ULL as special constant to indicate the
> intermediate value to indicate one thread is operating on it and the value
> should be semi-arbitrary value. For TDX (more correctly to use #VE), the
> value should include suppress #VE value which is shadow_init_value.
>
> Define SHADOW_REMOVED_SPTE as shadow_init_value | REMOVED_SPTE, and replace
> REMOVED_SPTE with SHADOW_REMOVED_SPTE to use suppress #VE bit properly for
> TDX.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
> ---
> arch/x86/kvm/mmu/spte.h | 14 ++++++++++++--
> arch/x86/kvm/mmu/tdp_mmu.c | 23 ++++++++++++++++-------
> 2 files changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> index bde843bce878..e88f796724b4 100644
> --- a/arch/x86/kvm/mmu/spte.h
> +++ b/arch/x86/kvm/mmu/spte.h
> @@ -194,7 +194,9 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
> * If a thread running without exclusive control of the MMU lock must perform a
> * multi-part operation on an SPTE, it can set the SPTE to REMOVED_SPTE as a
> * non-present intermediate value. Other threads which encounter this value
> - * should not modify the SPTE.
> + * should not modify the SPTE. When TDX is enabled, shadow_init_value, which
> + * is "suppress #VE" bit set, is also set to removed SPTE, because TDX module
> + * always enables "EPT violation #VE".
> *
> * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
> * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
> @@ -207,9 +209,17 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
> /* Removed SPTEs must not be misconstrued as shadow present PTEs. */
> static_assert(!(REMOVED_SPTE & SPTE_MMU_PRESENT_MASK));
>
> +/*
> + * See above comment around REMOVED_SPTE. SHADOW_REMOVED_SPTE is the actual
> + * intermediate value set to the removed SPET. When TDX is enabled, it sets
> + * the "suppress #VE" bit, otherwise it's REMOVED_SPTE.
> + */
> +extern u64 __read_mostly shadow_init_value;
> +#define SHADOW_REMOVED_SPTE (shadow_init_value | REMOVED_SPTE)
Please rename the existing REMOVED_SPTE to REMOVED_SPTE_MASK, and call
this simply REMOVED_SPTE. This also makes the patch smaller.
Paolo
> }
>
> /*
> diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
> index ebd0a02620e8..b6ec2f112c26 100644
> --- a/arch/x86/kvm/mmu/tdp_mmu.c
> +++ b/arch/x86/kvm/mmu/tdp_mmu.c
> @@ -338,7 +338,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
> * value to the removed SPTE value.
> */
> for (;;) {
> - old_child_spte = xchg(sptep, REMOVED_SPTE);
> + old_child_spte = xchg(sptep, SHADOW_REMOVED_SPTE);
> if (!is_removed_spte(old_child_spte))
> break;
> cpu_relax();
> @@ -365,10 +365,10 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
> * the two branches consistent and simplifies
> * the function.
> */
> - WRITE_ONCE(*sptep, REMOVED_SPTE);
> + WRITE_ONCE(*sptep, SHADOW_REMOVED_SPTE);
> }
> handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
> - old_child_spte, REMOVED_SPTE, level,
> + old_child_spte, SHADOW_REMOVED_SPTE, level,
> shared);
> }
>
> @@ -537,7 +537,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
> * immediately installing a present entry in its place
> * before the TLBs are flushed.
> */
> - if (!tdp_mmu_set_spte_atomic(kvm, iter, REMOVED_SPTE))
> + if (!tdp_mmu_set_spte_atomic(kvm, iter, SHADOW_REMOVED_SPTE))
> return false;
>
> kvm_flush_remote_tlbs_with_address(kvm, iter->gfn,
> @@ -550,8 +550,16 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
> * special removed SPTE value. No bookkeeping is needed
> * here since the SPTE is going from non-present
> * to non-present.
> + *
> + * Set non-present value to shadow_init_value, rather than 0.
> + * It is because when TDX is enabled, TDX module always
> + * enables "EPT-violation #VE", so KVM needs to set
> + * "suppress #VE" bit in EPT table entries, in order to get
> + * real EPT violation, rather than TDVMCALL. KVM sets
> + * shadow_init_value (which sets "suppress #VE" bit) so it
> + * can be set when EPT table entries are zapped.
> */
> - WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
> + WRITE_ONCE(*rcu_dereference(iter->sptep), shadow_init_value);
>
> return true;
> }
> @@ -748,7 +756,8 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
> continue;
>
> if (!shared) {
> - tdp_mmu_set_spte(kvm, &iter, 0);
> + /* see comments in tdp_mmu_zap_spte_atomic() */
> + tdp_mmu_set_spte(kvm, &iter, shadow_init_value);
> flush = true;
> } else if (!tdp_mmu_zap_spte_atomic(kvm, &iter)) {
> /*
> @@ -1135,7 +1144,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
> * invariant that the PFN of a present * leaf SPTE can never change.
> * See __handle_changed_spte().
> */
> - tdp_mmu_set_spte(kvm, iter, 0);
> + tdp_mmu_set_spte(kvm, iter, shadow_init_value);
>
> if (!pte_write(range->pte)) {
> new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
Powered by blists - more mailing lists