[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220429210025.3293691-3-seanjc@google.com>
Date: Fri, 29 Apr 2022 21:00:19 +0000
From: Sean Christopherson <seanjc@...gle.com>
To: Paolo Bonzini <pbonzini@...hat.com>
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org,
Sean Christopherson <seanjc@...gle.com>,
Lai Jiangshan <jiangshanlai@...il.com>,
David Woodhouse <dwmw@...zon.co.uk>,
Mingwei Zhang <mizhang@...gle.com>
Subject: [PATCH v3 2/8] Revert "KVM: Fix race between mmu_notifier
invalidation and pfncache refresh"
This reverts commit c496097d2c0bdc229f82d72b4b1e55d64974c316.
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
virt/kvm/kvm_main.c | 9 ------
virt/kvm/pfncache.c | 70 ++++++++++++++-------------------------------
2 files changed, 21 insertions(+), 58 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0848430f36c6..dfb7dabdbc63 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -705,15 +705,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
kvm->mn_active_invalidate_count++;
spin_unlock(&kvm->mn_invalidate_lock);
- /*
- * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
- * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
- * each cache's lock. There are relatively few caches in existence at
- * any given time, and the caches themselves can check for hva overlap,
- * i.e. don't need to rely on memslot overlap checks for performance.
- * Because this runs without holding mmu_lock, the pfn caches must use
- * mn_active_invalidate_count (see above) instead of mmu_notifier_count.
- */
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
hva_range.may_block);
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 71c84a43024c..dd84676615f1 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -112,63 +112,29 @@ static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa)
}
}
-static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
{
- bool first_attempt = true;
unsigned long mmu_seq;
kvm_pfn_t new_pfn;
+ int retry;
- lockdep_assert_held_write(&gpc->lock);
-
- for (;;) {
+ do {
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
- write_unlock_irq(&gpc->lock);
-
- /* Opportunistically check for resched while the lock isn't held. */
- if (!first_attempt)
- cond_resched();
-
/* We always request a writeable mapping */
- new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL);
-
- write_lock_irq(&gpc->lock);
-
+ new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
if (is_error_noslot_pfn(new_pfn))
break;
- first_attempt = false;
-
- /*
- * Wait for mn_active_invalidate_count, not mmu_notifier_count,
- * to go away, as the invalidation in the mmu_notifier event
- * occurs _before_ mmu_notifier_count is elevated.
- *
- * Note, mn_active_invalidate_count can change at any time as
- * it's not protected by gpc->lock. But, it is guaranteed to
- * be elevated before the mmu_notifier acquires gpc->lock, and
- * isn't dropped until after mmu_notifier_seq is updated. So,
- * this task may get a false positive of sorts, i.e. see an
- * elevated count and wait even though it's technically safe to
- * proceed (becase the mmu_notifier will invalidate the cache
- * _after_ it's refreshed here), but the cache will never be
- * refreshed with stale data, i.e. won't get false negatives.
- */
- if (kvm->mn_active_invalidate_count)
- continue;
-
- /*
- * Ensure mn_active_invalidate_count is read before
- * mmu_notifier_seq. This pairs with the smp_wmb() in
- * mmu_notifier_invalidate_range_end() to guarantee either the
- * old (non-zero) value of mn_active_invalidate_count or the
- * new (incremented) value of mmu_notifier_seq is observed.
- */
- smp_rmb();
- if (kvm->mmu_notifier_seq == mmu_seq)
+ KVM_MMU_READ_LOCK(kvm);
+ retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
+ KVM_MMU_READ_UNLOCK(kvm);
+ if (!retry)
break;
- }
+
+ cond_resched();
+ } while (1);
return new_pfn;
}
@@ -224,6 +190,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
* drop the lock and do the HVA to PFN lookup again.
*/
if (!old_valid || old_uhva != gpc->uhva) {
+ unsigned long uhva = gpc->uhva;
void *new_khva = NULL;
/* Placeholders for "hva is valid but not yet mapped" */
@@ -231,10 +198,15 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
gpc->khva = NULL;
gpc->valid = true;
- new_pfn = hva_to_pfn_retry(kvm, gpc);
+ write_unlock_irq(&gpc->lock);
+
+ new_pfn = hva_to_pfn_retry(kvm, uhva);
if (is_error_noslot_pfn(new_pfn)) {
ret = -EFAULT;
- } else if (gpc->usage & KVM_HOST_USES_PFN) {
+ goto map_done;
+ }
+
+ if (gpc->usage & KVM_HOST_USES_PFN) {
if (new_pfn == old_pfn) {
new_khva = old_khva;
old_pfn = KVM_PFN_ERR_FAULT;
@@ -250,10 +222,10 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
new_khva += page_offset;
else
ret = -EFAULT;
- } else {
- /* Nothing more to do, the pfn is consumed only by the guest. */
}
+ map_done:
+ write_lock_irq(&gpc->lock);
if (ret) {
gpc->valid = false;
gpc->pfn = KVM_PFN_ERR_FAULT;
--
2.36.0.464.gb9c8b46e94-goog
Powered by blists - more mailing lists