[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240821202814.711673-4-dwmw2@infradead.org>
Date: Wed, 21 Aug 2024 21:28:12 +0100
From: David Woodhouse <dwmw2@...radead.org>
To: Sean Christopherson <seanjc@...gle.com>,
Paolo Bonzini <pbonzini@...hat.com>,
"Hussain, Mushahid" <hmushi@...zon.co.uk>
Cc: Vitaly Kuznetsov <vkuznets@...hat.com>,
Wanpeng Li <wanpengli@...cent.com>,
Jim Mattson <jmattson@...gle.com>,
Joerg Roedel <joro@...tes.org>,
kvm@...r.kernel.org,
linux-kernel@...r.kernel.org,
Mingwei Zhang <mizhang@...gle.com>,
Maxim Levitsky <mlevitsk@...hat.com>
Subject: [PATCH v3 4/5] KVM: pfncache: Move invalidation to invalidate_range_end hook
From: David Woodhouse <dwmw@...zon.co.uk>
By performing the invalidation from the 'end' hook, the process is a lot
cleaner and simpler because it is guaranteed that ->needs_invalidation
will be cleared after hva_to_pfn() has been called, so the only thing
that hva_to_pfn_retry() needs to do is *wait* for there to be no pending
invalidations.
Another false positive on the range based checks is thus removed as well.
Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
---
virt/kvm/kvm_main.c | 20 +++++++----------
virt/kvm/kvm_mm.h | 12 +++++-----
virt/kvm/pfncache.c | 55 ++++++++++++++++++++-------------------------
3 files changed, 38 insertions(+), 49 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e04eb700448b..cca870f1a78c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -795,18 +795,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
}
spin_unlock(&kvm->mn_invalidate_lock);
- /*
- * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
- * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
- * each cache's lock. There are relatively few caches in existence at
- * any given time, and the caches themselves can check for hva overlap,
- * i.e. don't need to rely on memslot overlap checks for performance.
- * Because this runs without holding mmu_lock, the pfn caches must use
- * mn_active_invalidate_count (see above) instead of
- * mmu_invalidate_in_progress.
- */
- gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
-
/*
* If one or more memslots were found and thus zapped, notify arch code
* that guest memory has been reclaimed. This needs to be done *after*
@@ -860,6 +848,14 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
__kvm_handle_hva_range(kvm, &hva_range);
+ /*
+ * It's safe to invalidate the gfn_to_pfn_caches from this 'end'
+ * hook, because hva_to_pfn_retry() will wait until no active
+ * invalidations could be affecting the corresponding uHVA
+ * before before allowing a newly mapped GPC to become valid.
+ */
+ gfn_to_pfn_cache_invalidate(kvm, range->start, range->end);
+
/* Pairs with the increment in range_start(). */
spin_lock(&kvm->mn_invalidate_lock);
if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 715f19669d01..34e4e67f09f8 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -24,13 +24,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
bool *async, bool write_fault, bool *writable);
#ifdef CONFIG_HAVE_KVM_PFNCACHE
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
- unsigned long start,
- unsigned long end);
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end);
#else
-static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
- unsigned long start,
- unsigned long end)
+static inline void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end)
{
}
#endif /* HAVE_KVM_PFNCACHE */
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index fa494eb3d924..3f48df8cd6e5 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -20,10 +20,15 @@
#include "kvm_mm.h"
/*
- * MMU notifier 'invalidate_range_start' hook.
+ * MMU notifier 'invalidate_range_end' hook. The hva_to_pfn_retry() function
+ * below may look up a PFN just before it is zapped, and may be mapping it
+ * concurrently with the actual invalidation (with the GPC lock dropped). By
+ * using a separate 'needs_invalidation' flag, the concurrent invalidation
+ * can handle this case, causing hva_to_pfn_retry() to drop its result and
+ * retry correctly.
*/
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
- unsigned long end)
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm, unsigned long start,
+ unsigned long end)
{
struct gfn_to_pfn_cache *gpc;
@@ -140,15 +145,12 @@ static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc)
(gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva);
}
-static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
+static void gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
{
- bool waited = false;
-
spin_lock(&gpc->kvm->mn_invalidate_lock);
if (gpc_invalidations_pending(gpc)) {
DEFINE_WAIT(wait);
- waited = true;
for (;;) {
prepare_to_wait(&gpc->kvm->gpc_invalidate_wq, &wait,
TASK_UNINTERRUPTIBLE);
@@ -163,10 +165,8 @@ static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
finish_wait(&gpc->kvm->gpc_invalidate_wq, &wait);
}
spin_unlock(&gpc->kvm->mn_invalidate_lock);
- return waited;
}
-
static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
{
/* Note, the new page offset may be different than the old! */
@@ -199,28 +199,6 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
write_unlock_irq(&gpc->lock);
- /*
- * Invalidation occurs from the invalidate_range_start() hook,
- * which could already have happened before __kvm_gpc_refresh()
- * (or the previous turn around this loop) took gpc->lock().
- * If so, and if the corresponding invalidate_range_end() hook
- * hasn't happened yet, hva_to_pfn() could return a mapping
- * which is about to be stale and which should not be used. So
- * check if there are any currently-running invalidations which
- * affect the uHVA of this GPC, and retry if there are. Any
- * invalidation which starts after gpc->needs_invalidation is
- * set is fine, because it will clear that flag and trigger a
- * retry. And any invalidation which *completes* by having its
- * invalidate_range_end() hook called immediately prior to this
- * check is also fine, because the page tables are guaranteed
- * to have been changted already, so hva_to_pfn() won't return
- * a stale mapping in that case anyway.
- */
- if (gpc_wait_for_invalidations(gpc)) {
- write_lock_irq(&gpc->lock);
- continue;
- }
-
/*
* If the previous iteration "failed" due to an mmu_notifier
* event, release the pfn and unmap the kernel virtual address
@@ -261,6 +239,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
goto out_error;
}
+ /*
+ * Avoid populating a GPC with a user address which is already
+ * being invalidated, if the invalidate_range_start() notifier
+ * has already been called. The actual invalidation happens in
+ * the invalidate_range_end() callback, so wait until there are
+ * no active invalidations (for the relevant HVA).
+ */
+ gpc_wait_for_invalidations(gpc);
write_lock_irq(&gpc->lock);
@@ -269,6 +255,13 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
* attempting to refresh.
*/
WARN_ON_ONCE(gpc->valid);
+
+ /*
+ * Since gfn_to_pfn_cache_invalidate() is called from the
+ * kvm_mmu_notifier_invalidate_range_end() callback, it can
+ * invalidate the GPC the moment after hva_to_pfn() returned
+ * a valid PFN. If that happens, retry.
+ */
} while (!gpc->needs_invalidation);
gpc->valid = true;
--
2.44.0
Powered by blists - more mailing lists