[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241105184333.2305744-11-jthoughton@google.com>
Date: Tue, 5 Nov 2024 18:43:32 +0000
From: James Houghton <jthoughton@...gle.com>
To: Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc: David Matlack <dmatlack@...gle.com>, David Rientjes <rientjes@...gle.com>,
James Houghton <jthoughton@...gle.com>, Marc Zyngier <maz@...nel.org>,
Oliver Upton <oliver.upton@...ux.dev>, Wei Xu <weixugc@...gle.com>, Yu Zhao <yuzhao@...gle.com>,
Axel Rasmussen <axelrasmussen@...gle.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v8 10/11] KVM: x86/mmu: Support rmap walks without holding
mmu_lock when aging gfns
From: Sean Christopherson <seanjc@...gle.com>
When A/D bits are supported on sptes, it is safe to simply clear the
Accessed bits.
The less obvious case is marking sptes for access tracking in the
non-A/D case (for EPT only). In this case, we have to be sure that it is
okay for TLB entries to exist for non-present sptes. For example, when
doing dirty tracking, if we come across a non-present SPTE, we need to
know that we need to do a TLB invalidation.
This case is already supported today (as we already support *not* doing
TLBIs for clear_young(); there is a separate notifier for clearing *and*
flushing, clear_flush_young()). This works today because GET_DIRTY_LOG
flushes the TLB before returning to userspace.
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
Co-developed-by: James Houghton <jthoughton@...gle.com>
Signed-off-by: James Houghton <jthoughton@...gle.com>
---
arch/x86/kvm/mmu/mmu.c | 72 +++++++++++++++++++++++-------------------
1 file changed, 39 insertions(+), 33 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 71019762a28a..bdd6abf9b44e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -952,13 +952,11 @@ static unsigned long kvm_rmap_get(struct kvm_rmap_head *rmap_head)
* locking is the same, but the caller is disallowed from modifying the rmap,
* and so the unlock flow is a nop if the rmap is/was empty.
*/
-__maybe_unused
static unsigned long kvm_rmap_lock_readonly(struct kvm_rmap_head *rmap_head)
{
return __kvm_rmap_lock(rmap_head);
}
-__maybe_unused
static void kvm_rmap_unlock_readonly(struct kvm_rmap_head *rmap_head,
unsigned long old_val)
{
@@ -1677,37 +1675,48 @@ static void rmap_add(struct kvm_vcpu *vcpu, const struct kvm_memory_slot *slot,
}
static bool kvm_rmap_age_gfn_range(struct kvm *kvm,
- struct kvm_gfn_range *range, bool test_only)
+ struct kvm_gfn_range *range,
+ bool test_only)
{
- struct slot_rmap_walk_iterator iterator;
+ struct kvm_rmap_head *rmap_head;
struct rmap_iterator iter;
+ unsigned long rmap_val;
bool young = false;
u64 *sptep;
+ gfn_t gfn;
+ int level;
+ u64 spte;
- for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
- range->start, range->end - 1, &iterator) {
- for_each_rmap_spte(iterator.rmap, &iter, sptep) {
- u64 spte = *sptep;
+ for (level = PG_LEVEL_4K; level <= KVM_MAX_HUGEPAGE_LEVEL; level++) {
+ for (gfn = range->start; gfn < range->end;
+ gfn += KVM_PAGES_PER_HPAGE(level)) {
+ rmap_head = gfn_to_rmap(gfn, level, range->slot);
+ rmap_val = kvm_rmap_lock_readonly(rmap_head);
- if (!is_accessed_spte(spte))
- continue;
+ for_each_rmap_spte_lockless(rmap_head, &iter, sptep, spte) {
+ if (!is_accessed_spte(spte))
+ continue;
+
+ if (test_only) {
+ kvm_rmap_unlock_readonly(rmap_head, rmap_val);
+ return true;
+ }
- if (test_only)
- return true;
-
- if (spte_ad_enabled(spte)) {
- clear_bit((ffs(shadow_accessed_mask) - 1),
- (unsigned long *)sptep);
- } else {
- /*
- * WARN if mmu_spte_update() signals the need
- * for a TLB flush, as Access tracking a SPTE
- * should never trigger an _immediate_ flush.
- */
- spte = mark_spte_for_access_track(spte);
- WARN_ON_ONCE(mmu_spte_update(sptep, spte));
+ if (spte_ad_enabled(spte))
+ clear_bit((ffs(shadow_accessed_mask) - 1),
+ (unsigned long *)sptep);
+ else
+ /*
+ * If the following cmpxchg fails, the
+ * spte is being concurrently modified
+ * and should most likely stay young.
+ */
+ cmpxchg64(sptep, spte,
+ mark_spte_for_access_track(spte));
+ young = true;
}
- young = true;
+
+ kvm_rmap_unlock_readonly(rmap_head, rmap_val);
}
}
return young;
@@ -1725,11 +1734,8 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
if (tdp_mmu_enabled)
young = kvm_tdp_mmu_age_gfn_range(kvm, range);
- if (kvm_has_shadow_mmu_sptes(kvm)) {
- write_lock(&kvm->mmu_lock);
+ if (kvm_has_shadow_mmu_sptes(kvm))
young |= kvm_rmap_age_gfn_range(kvm, range, false);
- write_unlock(&kvm->mmu_lock);
- }
return young;
}
@@ -1741,11 +1747,11 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
if (tdp_mmu_enabled)
young = kvm_tdp_mmu_test_age_gfn(kvm, range);
- if (!young && kvm_has_shadow_mmu_sptes(kvm)) {
- write_lock(&kvm->mmu_lock);
+ if (young)
+ return young;
+
+ if (kvm_has_shadow_mmu_sptes(kvm))
young |= kvm_rmap_age_gfn_range(kvm, range, true);
- write_unlock(&kvm->mmu_lock);
- }
return young;
}
--
2.47.0.199.ga7371fff76-goog
Powered by blists - more mailing lists