[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <564d7c99-0490-897e-6660-54203c0aa31e@redhat.com>
Date: Tue, 5 Apr 2022 17:15:48 +0200
From: Paolo Bonzini <pbonzini@...hat.com>
To: isaku.yamahata@...el.com, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: isaku.yamahata@...il.com, Jim Mattson <jmattson@...gle.com>,
erdemaktas@...gle.com, Connor Kuehl <ckuehl@...hat.com>,
Sean Christopherson <seanjc@...gle.com>
Subject: Re: [RFC PATCH v5 049/104] KVM: x86/tdp_mmu: Ignore unsupported mmu
operation on private GFNs
On 3/4/22 20:49, isaku.yamahata@...el.com wrote:
> static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
> + bool private_spte,
> u64 old_spte, u64 new_spte, int level)
> {
> bool pfn_changed;
> struct kvm_memory_slot *slot;
>
> + /*
> + * TDX doesn't support live migration. Never mark private page as
> + * dirty in log-dirty bitmap, since it's not possible for userspace
> + * hypervisor to live migrate private page anyway.
> + */
> + if (private_spte)
> + return;
This should not be needed, patch 35 will block it anyway because
kvm_slot_dirty_track_enabled() will return false.
> @@ -1215,7 +1227,23 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
> * into this helper allow blocking; it'd be dead, wasteful code.
> */
> for_each_tdp_mmu_root(kvm, root, range->slot->as_id) {
> - tdp_root_for_each_leaf_pte(iter, root, range->start, range->end)
> + /*
> + * For TDX shared mapping, set GFN shared bit to the range,
> + * so the handler() doesn't need to set it, to avoid duplicated
> + * code in multiple handler()s.
> + */
> + gfn_t start;
> + gfn_t end;
> +
> + if (is_private_sp(root)) {
> + start = kvm_gfn_private(kvm, range->start);
> + end = kvm_gfn_private(kvm, range->end);
> + } else {
> + start = kvm_gfn_shared(kvm, range->start);
> + end = kvm_gfn_shared(kvm, range->end);
> + }
I think this could be a separate function kvm_gfn_for_root(kvm, root, ...).
> @@ -1237,6 +1265,15 @@ static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
> if (!is_accessed_spte(iter->old_spte))
> return false;
>
> + /*
> + * First TDX generation doesn't support clearing A bit for private
> + * mapping, since there's no secure EPT API to support it. However
> + * it's a legitimate request for TDX guest, so just return w/o a
> + * WARN().
> + */
> + if (is_private_spte(iter->sptep))
> + return false;
Please add instead a "bool only_shared" argument to
kvm_tdp_mmu_handle_gfn, since you can check "only_shared &&
is_private_sp(root)" just once (instead of checking it once per PTE).
> new_spte = iter->old_spte;
>
> if (spte_ad_enabled(new_spte)) {
> @@ -1281,6 +1318,13 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
> /* Huge pages aren't expected to be modified without first being zapped. */
> WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
>
> + /*
> + * .change_pte() callback should not happen for private page, because
> + * for now TDX private pages are pinned during VM's life time.
> + */
> + if (WARN_ON(is_private_spte(iter->sptep)))
> + return false;
> +
> if (iter->level != PG_LEVEL_4K ||
> !is_shadow_present_pte(iter->old_spte))
> return false;
> @@ -1332,6 +1376,16 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
> u64 new_spte;
> bool spte_set = false;
>
> + /*
> + * First TDX generation doesn't support write protecting private
> + * mappings, since there's no secure EPT API to support it. It
> + * is a bug to reach here for TDX guest.
> + */
> + if (WARN_ON(is_private_sp(root)))
> + return spte_set;
Isn't this function unreachable even for the shared address range? If
so, this WARN should be in kvm_tdp_mmu_wrprot_slot, and also it should
check if !kvm_arch_dirty_log_supported(kvm).
> + start = kvm_gfn_shared(kvm, start);
> + end = kvm_gfn_shared(kvm, end);
... and then these two lines are unnecessary.
> rcu_read_lock();
>
> BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
> @@ -1398,6 +1452,16 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
> u64 new_spte;
> bool spte_set = false;
>
> + /*
> + * First TDX generation doesn't support clearing dirty bit,
> + * since there's no secure EPT API to support it. It is a
> + * bug to reach here for TDX guest.
> + */
> + if (WARN_ON(is_private_sp(root)))
> + return spte_set;
Same here, can you check it in kvm_tdp_mmu_clear_dirty_slot?
> + start = kvm_gfn_shared(kvm, start);
> + end = kvm_gfn_shared(kvm, end);
Same here.
> rcu_read_lock();
>
> tdp_root_for_each_leaf_pte(iter, root, start, end) {
> @@ -1467,6 +1531,15 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
> struct tdp_iter iter;
> u64 new_spte;
>
> + /*
> + * First TDX generation doesn't support clearing dirty bit,
> + * since there's no secure EPT API to support it. It is a
> + * bug to reach here for TDX guest.
> + */
> + if (WARN_ON(is_private_sp(root)))
> + return;
IIRC this is reachable from userspace, so WARN_ON is not possible. But,
again please check
if (!kvm_arch_dirty_log_supported(kvm))
return;
in kvm_tdp_mmu_clear_dirty_pt_masked.
> + gfn = kvm_gfn_shared(kvm, gfn);
Also unnecessary, I think.
> rcu_read_lock();
>
> tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
> @@ -1530,6 +1603,16 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
> struct tdp_iter iter;
> kvm_pfn_t pfn;
>
> + /*
> + * This should only be reachable in case of log-dirty, which TD
> + * private mapping doesn't support so far. Give a WARN() if it
> + * hits private mapping.
> + */
> + if (WARN_ON(is_private_sp(root)))
> + return;
> + start = kvm_gfn_shared(kvm, start);
> + end = kvm_gfn_shared(kvm, end);
I think this is not a big deal and you can leave it as is.
Alternatively, please move the WARN to
kvm_tdp_mmu_zap_collapsible_sptes(). It is also only reachable if you
can set KVM_MEM_LOG_DIRTY_PAGES in the first place.
Paolo
> rcu_read_lock();
>
> tdp_root_for_each_pte(iter, root, start, end) {
> @@ -1543,8 +1626,9 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
>
> pfn = spte_to_pfn(iter.old_spte);
> if (kvm_is_reserved_pfn(pfn) ||
> - iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
> - pfn, PG_LEVEL_NUM))
> + iter.level >= kvm_mmu_max_mapping_level(kvm, slot,
> + tdp_iter_gfn_unalias(kvm, &iter), pfn,
> + PG_LEVEL_NUM))
> continue;
>
> /* Note, a successful atomic zap also does a remote TLB flush. */
> @@ -1590,6 +1674,14 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
>
> BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
>
> + /*
> + * First TDX generation doesn't support write protecting private
> + * mappings, since there's no secure EPT API to support it. It
> + * is a bug to reach here for TDX guest.
> + */
> + if (WARN_ON(is_private_sp(root)))
> + return spte_set;
> +
> rcu_read_lock();
>
> for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
Powered by blists - more mailing lists