lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240926013506.860253-11-jthoughton@google.com>
Date: Thu, 26 Sep 2024 01:34:58 +0000
From: James Houghton <jthoughton@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, David Matlack <dmatlack@...gle.com>, 
	David Rientjes <rientjes@...gle.com>, James Houghton <jthoughton@...gle.com>, 
	Jason Gunthorpe <jgg@...pe.ca>, Jonathan Corbet <corbet@....net>, Marc Zyngier <maz@...nel.org>, 
	Oliver Upton <oliver.upton@...ux.dev>, Wei Xu <weixugc@...gle.com>, Yu Zhao <yuzhao@...gle.com>, 
	Axel Rasmussen <axelrasmussen@...gle.com>, kvm@...r.kernel.org, linux-doc@...r.kernel.org, 
	linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: [PATCH v7 10/18] KVM: x86/mmu: Support rmap walks without holding
 mmu_lock when aging gfns

From: Sean Christopherson <seanjc@...gle.com>

Because an L1 KVM can disable A/D bits for its L2, even if
kvm_ad_enabled() in L0, we cannot always locklessly age, as aging
requires marking non-A/D sptes for access tracking, which is not
supported locklessly yet.

We can always gather age information locklessly though.

Signed-off-by: Sean Christopherson <seanjc@...gle.com>
[jthoughton: Added changelog, adjusted conditional]
Signed-off-by: James Houghton <jthoughton@...gle.com>
---
 arch/x86/kvm/mmu/mmu.c | 66 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 72c682fa207a..a63497bbcc61 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1014,13 +1014,11 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head)
  * locking is the same, but the caller is disallowed from modifying the rmap,
  * and so the unlock flow is a nop if the rmap is/was empty.
  */
-__maybe_unused
 static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
 {
 	return __kvm_rmap_lock(rmap_head);
 }
 
-__maybe_unused
 static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
 				     unsigned long old_val)
 {
@@ -1736,8 +1734,53 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
 	__rmap_add(vcpu->kvm, cache, slot, spte, gfn, access);
 }
 
-static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
-				   struct kvm_gfn_range *range, bool test_only)
+static bool kvm_rmap_age_gfn_range_lockless(struct kvm *kvm,
+					    struct kvm_gfn_range *range,
+					    bool test_only)
+{
+	struct kvm_rmap_head *rmap_head;
+	struct rmap_iterator iter;
+	unsigned long rmap_val;
+	bool young = false;
+	u64 *sptep;
+	gfn_t gfn;
+	int level;
+	u64 spte;
+
+	for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+		for (gfn = range->start; gfn < range->end;
+		     gfn += KVM_PAGES_PER_HPAGE(level)) {
+			rmap_head = gfn_to_rmap(gfn, level, range->slot);
+			rmap_val = kvm_rmap_lock_readonly(rmap_head);
+
+			for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) {
+				if (!is_accessed_spte(spte))
+					continue;
+
+				if (test_only) {
+					kvm_rmap_unlock_readonly(rmap_head, rmap_val);
+					return true;
+				}
+
+				/*
+				 * Marking SPTEs for access tracking outside of
+				 * mmu_lock is unsupported.  Report the page as
+				 * young, but otherwise leave it as-is.
+				 */
+				if (spte_ad_enabled(spte))
+					clear_bit((ffs(shadow_accessed_mask) - 1),
+						  (unsigned long *)sptep);
+				young = true;
+			}
+
+			kvm_rmap_unlock_readonly(rmap_head, rmap_val);
+		}
+	}
+	return young;
+}
+
+static bool __kvm_rmap_age_gfn_range(struct kvm *kvm,
+				     struct kvm_gfn_range *range, bool test_only)
 {
 	struct slot_rmap_walk_iterator iterator;
 	struct rmap_iterator iter;
@@ -1776,6 +1819,21 @@ static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
 	return young;
 }
 
+static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
+				   struct kvm_gfn_range *range, bool test_only)
+{
+	/*
+	 * We can always locklessly test if an spte is young. Because marking
+	 * non-A/D sptes for access tracking without holding the mmu_lock is
+	 * not currently supported, we cannot always locklessly clear.
+	 */
+	if (test_only)
+		return kvm_rmap_age_gfn_range_lockless(kvm, range, test_only);
+
+	lockdep_assert_held_write(&kvm->mmu_lock);
+	return __kvm_rmap_age_gfn_range(kvm, range, test_only);
+}
+
 static bool kvm_has_shadow_mmu_sptes(struct kvm *kvm)
 {
 	return !tdp_mmu_enabled || READ_ONCE(kvm->arch.indirect_shadow_pages);
-- 
2.46.0.792.g87dc391469-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ