linux-kernel - [PATCH 23/24] kvm: x86/mmu: Freeze SPTEs in disconnected pages

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20210112181041.356734-24-bgardon@google.com>
Date:   Tue, 12 Jan 2021 10:10:40 -0800
From:   Ben Gardon <bgardon@...gle.com>
To:     linux-kernel@...r.kernel.org, kvm@...r.kernel.org
Cc:     Paolo Bonzini <pbonzini@...hat.com>, Peter Xu <peterx@...hat.com>,
        Sean Christopherson <seanjc@...gle.com>,
        Peter Shier <pshier@...gle.com>,
        Peter Feiner <pfeiner@...gle.com>,
        Junaid Shahid <junaids@...gle.com>,
        Jim Mattson <jmattson@...gle.com>,
        Yulei Zhang <yulei.kernel@...il.com>,
        Wanpeng Li <kernellwp@...il.com>,
        Vitaly Kuznetsov <vkuznets@...hat.com>,
        Xiao Guangrong <xiaoguangrong.eric@...il.com>,
        Ben Gardon <bgardon@...gle.com>
Subject: [PATCH 23/24] kvm: x86/mmu: Freeze SPTEs in disconnected pages

When clearing TDP MMU pages what have been disconnected from the paging
structure root, set the SPTEs to a special non-present value which will
not be overwritten by other threads. This is needed to prevent races in
which a thread is clearing a disconnected page table, but another thread
has already acquired a pointer to that memory and installs a mapping in
an already cleared entry. This can lead to memory leaks and accounting
errors.

Reviewed-by: Peter Feiner <pfeiner@...gle.com>

Signed-off-by: Ben Gardon <bgardon@...gle.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 5c9d053000ad..45160ff84e91 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -333,13 +333,14 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt,
 {
 	struct kvm_mmu_page *sp;
 	gfn_t gfn;
+	gfn_t base_gfn;
 	int level;
 	u64 *sptep;
 	u64 old_child_spte;
 	int i;
 
 	sp = sptep_to_sp(pt);
-	gfn = sp->gfn;
+	base_gfn = sp->gfn;
 	level = sp->role.level;
 
 	trace_kvm_mmu_prepare_zap_page(sp);
@@ -348,16 +349,38 @@ static void handle_disconnected_tdp_mmu_page(struct kvm *kvm, u64 *pt,
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
 		sptep = pt + i;
+		gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
 
 		if (atomic) {
-			old_child_spte = xchg(sptep, 0);
+			/*
+			 * Set the SPTE to a nonpresent value that other
+			 * threads will not overwrite. If the SPTE was already
+			 * frozen then another thread handling a page fault
+			 * could overwrite it, so set the SPTE until it is set
+			 * from nonfrozen -> frozen.
+			 */
+			for (;;) {
+				old_child_spte = xchg(sptep, FROZEN_SPTE);
+				if (old_child_spte != FROZEN_SPTE)
+					break;
+				cpu_relax();
+			}
 		} else {
 			old_child_spte = READ_ONCE(*sptep);
-			WRITE_ONCE(*sptep, 0);
+
+			/*
+			 * Setting the SPTE to FROZEN_SPTE is not strictly
+			 * necessary here as the MMU lock should stop other
+			 * threads from concurrentrly modifying this SPTE.
+			 * Using FROZEN_SPTE keeps the atomic and
+			 * non-atomic cases consistent and simplifies the
+			 * function.
+			 */
+			WRITE_ONCE(*sptep, FROZEN_SPTE);
 		}
-		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp),
-			gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)),
-			old_child_spte, 0, level - 1, atomic);
+		handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn,
+				    old_child_spte, FROZEN_SPTE, level - 1,
+				    atomic);
 	}
 
 	kvm_flush_remote_tlbs_with_address(kvm, gfn,
-- 
2.30.0.284.gd98b1dd5eaa7-goog