lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240821202814.711673-4-dwmw2@infradead.org>
Date: Wed, 21 Aug 2024 21:28:12 +0100
From: David Woodhouse <dwmw2@...radead.org>
To: Sean Christopherson <seanjc@...gle.com>,
	Paolo Bonzini <pbonzini@...hat.com>,
	"Hussain, Mushahid" <hmushi@...zon.co.uk>
Cc: Vitaly Kuznetsov <vkuznets@...hat.com>,
	Wanpeng Li <wanpengli@...cent.com>,
	Jim Mattson <jmattson@...gle.com>,
	Joerg Roedel <joro@...tes.org>,
	kvm@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Mingwei Zhang <mizhang@...gle.com>,
	Maxim Levitsky <mlevitsk@...hat.com>
Subject: [PATCH v3 4/5] KVM: pfncache: Move invalidation to invalidate_range_end hook

From: David Woodhouse <dwmw@...zon.co.uk>

By performing the invalidation from the 'end' hook, the process is a lot
cleaner and simpler because it is guaranteed that ->needs_invalidation
will be cleared after hva_to_pfn() has been called, so the only thing
that hva_to_pfn_retry() needs to do is *wait* for there to be no pending
invalidations.

Another false positive on the range based checks is thus removed as well.

Signed-off-by: David Woodhouse <dwmw@...zon.co.uk>
---
 virt/kvm/kvm_main.c | 20 +++++++----------
 virt/kvm/kvm_mm.h   | 12 +++++-----
 virt/kvm/pfncache.c | 55 ++++++++++++++++++++-------------------------
 3 files changed, 38 insertions(+), 49 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e04eb700448b..cca870f1a78c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -795,18 +795,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	}
 	spin_unlock(&kvm->mn_invalidate_lock);
 
-	/*
-	 * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
-	 * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
-	 * each cache's lock.  There are relatively few caches in existence at
-	 * any given time, and the caches themselves can check for hva overlap,
-	 * i.e. don't need to rely on memslot overlap checks for performance.
-	 * Because this runs without holding mmu_lock, the pfn caches must use
-	 * mn_active_invalidate_count (see above) instead of
-	 * mmu_invalidate_in_progress.
-	 */
-	gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end);
-
 	/*
 	 * If one or more memslots were found and thus zapped, notify arch code
 	 * that guest memory has been reclaimed.  This needs to be done *after*
@@ -860,6 +848,14 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
 	__kvm_handle_hva_range(kvm, &hva_range);
 
+	/*
+	 * It's safe to invalidate the gfn_to_pfn_caches from this 'end'
+	 * hook, because hva_to_pfn_retry() will wait until no active
+	 * invalidations could be affecting the corresponding uHVA
+	 * before before allowing a newly mapped GPC to become valid.
+	 */
+	gfn_to_pfn_cache_invalidate(kvm, range->start, range->end);
+
 	/* Pairs with the increment in range_start(). */
 	spin_lock(&kvm->mn_invalidate_lock);
 	if (!WARN_ON_ONCE(!kvm->mn_active_invalidate_count))
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 715f19669d01..34e4e67f09f8 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -24,13 +24,13 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
 		     bool *async, bool write_fault, bool *writable);
 
 #ifdef CONFIG_HAVE_KVM_PFNCACHE
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
-				       unsigned long start,
-				       unsigned long end);
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+				 unsigned long start,
+				 unsigned long end);
 #else
-static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
-						     unsigned long start,
-						     unsigned long end)
+static inline void gfn_to_pfn_cache_invalidate(struct kvm *kvm,
+					       unsigned long start,
+					       unsigned long end)
 {
 }
 #endif /* HAVE_KVM_PFNCACHE */
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index fa494eb3d924..3f48df8cd6e5 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -20,10 +20,15 @@
 #include "kvm_mm.h"
 
 /*
- * MMU notifier 'invalidate_range_start' hook.
+ * MMU notifier 'invalidate_range_end' hook. The hva_to_pfn_retry() function
+ * below may look up a PFN just before it is zapped, and may be mapping it
+ * concurrently with the actual invalidation (with the GPC lock dropped). By
+ * using a separate 'needs_invalidation' flag, the concurrent invalidation
+ * can handle this case, causing hva_to_pfn_retry() to drop its result and
+ * retry correctly.
  */
-void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
-				       unsigned long end)
+void gfn_to_pfn_cache_invalidate(struct kvm *kvm, unsigned long start,
+				 unsigned long end)
 {
 	struct gfn_to_pfn_cache *gpc;
 
@@ -140,15 +145,12 @@ static bool gpc_invalidations_pending(struct gfn_to_pfn_cache *gpc)
 		(gpc->kvm->mmu_gpc_invalidate_range_end > gpc->uhva);
 }
 
-static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
+static void gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
 {
-	bool waited = false;
-
 	spin_lock(&gpc->kvm->mn_invalidate_lock);
 	if (gpc_invalidations_pending(gpc)) {
 		DEFINE_WAIT(wait);
 
-		waited = true;
 		for (;;) {
 			prepare_to_wait(&gpc->kvm->gpc_invalidate_wq, &wait,
 					TASK_UNINTERRUPTIBLE);
@@ -163,10 +165,8 @@ static bool gpc_wait_for_invalidations(struct gfn_to_pfn_cache *gpc)
 		finish_wait(&gpc->kvm->gpc_invalidate_wq, &wait);
 	}
 	spin_unlock(&gpc->kvm->mn_invalidate_lock);
-	return waited;
 }
 
-
 static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 {
 	/* Note, the new page offset may be different than the old! */
@@ -199,28 +199,6 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 
 		write_unlock_irq(&gpc->lock);
 
-		/*
-		 * Invalidation occurs from the invalidate_range_start() hook,
-		 * which could already have happened before __kvm_gpc_refresh()
-		 * (or the previous turn around this loop) took gpc->lock().
-		 * If so, and if the corresponding invalidate_range_end() hook
-		 * hasn't happened yet, hva_to_pfn() could return a mapping
-		 * which is about to be stale and which should not be used. So
-		 * check if there are any currently-running invalidations which
-		 * affect the uHVA of this GPC, and retry if there are. Any
-		 * invalidation which starts after gpc->needs_invalidation is
-		 * set is fine, because it will clear that flag and trigger a
-		 * retry. And any invalidation which *completes* by having its
-		 * invalidate_range_end() hook called immediately prior to this
-		 * check is also fine, because the page tables are guaranteed
-		 * to have been changted already, so hva_to_pfn() won't return
-		 * a stale mapping in that case anyway.
-		 */
-		if (gpc_wait_for_invalidations(gpc)) {
-			write_lock_irq(&gpc->lock);
-			continue;
-		}
-
 		/*
 		 * If the previous iteration "failed" due to an mmu_notifier
 		 * event, release the pfn and unmap the kernel virtual address
@@ -261,6 +239,14 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 			goto out_error;
 		}
 
+		/*
+		 * Avoid populating a GPC with a user address which is already
+		 * being invalidated, if the invalidate_range_start() notifier
+		 * has already been called. The actual invalidation happens in
+		 * the invalidate_range_end() callback, so wait until there are
+		 * no active invalidations (for the relevant HVA).
+		 */
+		gpc_wait_for_invalidations(gpc);
 
 		write_lock_irq(&gpc->lock);
 
@@ -269,6 +255,13 @@ static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 		 * attempting to refresh.
 		 */
 		WARN_ON_ONCE(gpc->valid);
+
+		/*
+		 * Since gfn_to_pfn_cache_invalidate() is called from the
+		 * kvm_mmu_notifier_invalidate_range_end() callback, it can
+		 * invalidate the GPC the moment after hva_to_pfn() returned
+		 * a valid PFN. If that happens, retry.
+		 */
 	} while (!gpc->needs_invalidation);
 
 	gpc->valid = true;
-- 
2.44.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ