linux-kernel - [PATCH v2 3/4] KVM: x86/mmu: Rearrange locks and to

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240829191135.2041489-4-vipinsh@google.com>
Date: Thu, 29 Aug 2024 12:11:34 -0700
From: Vipin Sharma <vipinsh@...gle.com>
To: seanjc@...gle.com, pbonzini@...hat.com, dmatlack@...gle.com
Cc: kvm@...r.kernel.org, linux-kernel@...r.kernel.org, 
	Vipin Sharma <vipinsh@...gle.com>
Subject: [PATCH v2 3/4] KVM: x86/mmu: Rearrange locks and to_zap count for NX
 huge page recovery

Extract out locks from TDP and legacy MMU NX huge page recovery flow and
use them at a common place inside recovery worker. Also, move to_zap
calculations to their respective recovery functions.

Extracting out locks will allow acquiring and using locks for
TDP flow in the same way as other TDP APIs i.e. take read lock and then
call the TDP APIs. This will be utilized when TDP MMU NX huge page
recovery will switch to using read lock.

to_zap calculation outside recovery code was needed as same code was
used for both TDP and legacy MMU. Now, as both flows have different code
there is no need to calculate them separately at a common place. Let the
respective functions handle that.

Signed-off-by: Vipin Sharma <vipinsh@...gle.com>
---
 arch/x86/kvm/mmu/mmu.c     | 45 +++++++++++++-------------------------
 arch/x86/kvm/mmu/tdp_mmu.c | 23 +++++--------------
 arch/x86/kvm/mmu/tdp_mmu.h |  5 +----
 3 files changed, 21 insertions(+), 52 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c8c64df979e3..d636850c6929 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7354,19 +7354,18 @@ bool kvm_mmu_sp_dirty_logging_enabled(struct kvm *kvm, struct kvm_mmu_page *sp)
 	return slot && kvm_slot_dirty_track_enabled(slot);
 }
 
-static void kvm_mmu_recover_nx_huge_pages(struct kvm *kvm,
-					  struct list_head *nx_huge_pages,
-					  unsigned long to_zap)
+static void kvm_mmu_recover_nx_huge_pages(struct kvm *kvm)
 {
-	int rcu_idx;
+	unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages_count);
+	unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
+	unsigned long to_zap = ratio ? DIV_ROUND_UP(pages, ratio) : 0;
 	struct kvm_mmu_page *sp;
 	LIST_HEAD(invalid_list);
 
-	rcu_idx = srcu_read_lock(&kvm->srcu);
-	write_lock(&kvm->mmu_lock);
+	lockdep_assert_held_write(&kvm->mmu_lock);
 
 	for ( ; to_zap; --to_zap) {
-		if (list_empty(nx_huge_pages))
+		if (list_empty(&kvm->arch.possible_nx_huge_pages))
 			break;
 
 		/*
@@ -7376,7 +7375,7 @@ static void kvm_mmu_recover_nx_huge_pages(struct kvm *kvm,
 		 * the total number of shadow pages.  And because the TDP MMU
 		 * doesn't use active_mmu_pages.
 		 */
-		sp = list_first_entry(nx_huge_pages,
+		sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
 				      struct kvm_mmu_page,
 				      possible_nx_huge_page_link);
 		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
@@ -7401,9 +7400,6 @@ static void kvm_mmu_recover_nx_huge_pages(struct kvm *kvm,
 		}
 	}
 	kvm_mmu_commit_zap_page(kvm, &invalid_list);
-
-	write_unlock(&kvm->mmu_lock);
-	srcu_read_unlock(&kvm->srcu, rcu_idx);
 }
 
 static long get_nx_huge_page_recovery_timeout(u64 start_time)
@@ -7417,19 +7413,11 @@ static long get_nx_huge_page_recovery_timeout(u64 start_time)
 		       : MAX_SCHEDULE_TIMEOUT;
 }
 
-static unsigned long nx_huge_pages_to_zap(struct kvm *kvm)
-{
-	unsigned long pages = READ_ONCE(kvm->arch.possible_nx_huge_pages_count);
-	unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-
-	return ratio ? DIV_ROUND_UP(pages, ratio) : 0;
-}
-
 static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
 {
-	unsigned long to_zap;
 	long remaining_time;
 	u64 start_time;
+	int rcu_idx;
 
 	while (true) {
 		start_time = get_jiffies_64();
@@ -7447,19 +7435,16 @@ static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
 		if (kthread_should_stop())
 			return 0;
 
-		to_zap = nx_huge_pages_to_zap(kvm);
-		kvm_mmu_recover_nx_huge_pages(kvm,
-					      &kvm->arch.possible_nx_huge_pages,
-					      to_zap);
+		rcu_idx = srcu_read_lock(&kvm->srcu);
+		write_lock(&kvm->mmu_lock);
 
+		kvm_mmu_recover_nx_huge_pages(kvm);
 		if (tdp_mmu_enabled) {
-#ifdef CONFIG_X86_64
-			to_zap = kvm_tdp_mmu_nx_huge_pages_to_zap(kvm);
-			kvm_tdp_mmu_recover_nx_huge_pages(kvm,
-						      &kvm->arch.tdp_mmu_possible_nx_huge_pages,
-						      to_zap);
-#endif
+			kvm_tdp_mmu_recover_nx_huge_pages(kvm);
 		}
+
+		write_unlock(&kvm->mmu_lock);
+		srcu_read_unlock(&kvm->srcu, rcu_idx);
 	}
 }
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index f0b4341264fd..179cfd67609a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1798,25 +1798,15 @@ u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gfn_t gfn,
 	return rcu_dereference(sptep);
 }
 
-unsigned long kvm_tdp_mmu_nx_huge_pages_to_zap(struct kvm *kvm)
+void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm)
 {
 	unsigned long pages = READ_ONCE(kvm->arch.tdp_mmu_possible_nx_huge_pages_count);
 	unsigned int ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
-
-	return ratio ? DIV_ROUND_UP(pages, ratio) : 0;
-}
-
-void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm,
-				   struct list_head *nx_huge_pages,
-				   unsigned long to_zap)
-{
-	int rcu_idx;
+	unsigned long to_zap = ratio ? DIV_ROUND_UP(pages, ratio) : 0;
 	struct kvm_mmu_page *sp;
 	bool flush = false;
 
-	rcu_idx = srcu_read_lock(&kvm->srcu);
-	write_lock(&kvm->mmu_lock);
-
+	lockdep_assert_held_write(&kvm->mmu_lock);
 	/*
 	 * Zapping TDP MMU shadow pages, including the remote TLB flush, must
 	 * be done under RCU protection, because the pages are freed via RCU
@@ -1825,7 +1815,7 @@ void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm,
 	rcu_read_lock();
 
 	for ( ; to_zap; --to_zap) {
-		if (list_empty(nx_huge_pages))
+		if (list_empty(&kvm->arch.tdp_mmu_possible_nx_huge_pages))
 			break;
 
 		/*
@@ -1835,7 +1825,7 @@ void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm,
 		 * the total number of shadow pages.  And because the TDP MMU
 		 * doesn't use active_mmu_pages.
 		 */
-		sp = list_first_entry(nx_huge_pages,
+		sp = list_first_entry(&kvm->arch.tdp_mmu_possible_nx_huge_pages,
 				      struct kvm_mmu_page,
 				      possible_nx_huge_page_link);
 		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
@@ -1869,7 +1859,4 @@ void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm,
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
 	rcu_read_unlock();
-
-	write_unlock(&kvm->mmu_lock);
-	srcu_read_unlock(&kvm->srcu, rcu_idx);
 }
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 4036552f40cd..86c1065a672d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -67,10 +67,7 @@ int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
 u64 *kvm_tdp_mmu_fast_pf_get_last_sptep(struct kvm_vcpu *vcpu, gfn_t gfn,
 					u64 *spte);
 
-unsigned long kvm_tdp_mmu_nx_huge_pages_to_zap(struct kvm *kvm);
-void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm,
-				   struct list_head *nx_huge_pages,
-				   unsigned long to_zap);
+void kvm_tdp_mmu_recover_nx_huge_pages(struct kvm *kvm);
 
 #ifdef CONFIG_X86_64
 static inline bool is_tdp_mmu_page(struct kvm_mmu_page *sp) { return sp->tdp_mmu_page; }
-- 
2.46.0.469.g59c65b2a67-goog