lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424030858.519-1-yan.y.zhao@intel.com>
Date: Thu, 24 Apr 2025 11:08:58 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
	seanjc@...gle.com
Cc: linux-kernel@...r.kernel.org,
	kvm@...r.kernel.org,
	x86@...nel.org,
	rick.p.edgecombe@...el.com,
	dave.hansen@...el.com,
	kirill.shutemov@...el.com,
	tabba@...gle.com,
	ackerleytng@...gle.com,
	quic_eberman@...cinc.com,
	michael.roth@....com,
	david@...hat.com,
	vannapurve@...gle.com,
	vbabka@...e.cz,
	jroedel@...e.de,
	thomas.lendacky@....com,
	pgonda@...gle.com,
	zhiquan1.li@...el.com,
	fan.du@...el.com,
	jun.miao@...el.com,
	ira.weiny@...el.com,
	isaku.yamahata@...el.com,
	xiaoyao.li@...el.com,
	binbin.wu@...ux.intel.com,
	chao.p.peng@...el.com,
	Yan Zhao <yan.y.zhao@...el.com>
Subject: [RFC PATCH 19/21] KVM: gmem: Split huge boundary leafs for punch hole of private memory

Splitting of huge leafs in the mirror page table for kvm_gmem_punch_hole().

Enhance kvm_gmem_invalidate_begin() to invoke kvm_split_boundary_leafs()
for splitting boundary huge leafs before caling kvm_unmap_gfn_range() to do
the real zapping. As kvm_split_boundary_leafs() may fail due to out of
memory, propagate the error to further fail the kvm_gmem_punch_hole().

Splitting huge boudary leafs in the mirror page table is not required for
kvm_gmem_release() as the entire page table is to be zapped; it's also not
required for kvm_gmem_error_folio() as a SPTE must not map more than one
physical folio.

Note: as the kvm_gmem_punch_hole() may request to zap several GFN ranges,
if an out-of-memory error occurs during the splitting of a GFN range, some
previous GFN ranges may have been successfully split and zapped.

Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
 virt/kvm/guest_memfd.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 4bb140e7f30d..008061734ac5 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -292,13 +292,14 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, int
 	return folio;
 }
 
-static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
-				      pgoff_t end)
+static int kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
+				     pgoff_t end, bool need_split)
 {
 	bool flush = false, found_memslot = false;
 	struct kvm_memory_slot *slot;
 	struct kvm *kvm = gmem->kvm;
 	unsigned long index;
+	int ret = 0;
 
 	xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
 		pgoff_t pgoff = slot->gmem.pgoff;
@@ -319,14 +320,23 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
 			kvm_mmu_invalidate_begin(kvm);
 		}
 
+		if (need_split) {
+			ret = kvm_split_boundary_leafs(kvm, &gfn_range);
+			if (ret < 0)
+				goto out;
+
+			flush |= ret;
+		}
 		flush |= kvm_mmu_unmap_gfn_range(kvm, &gfn_range);
 	}
 
+out:
 	if (flush)
 		kvm_flush_remote_tlbs(kvm);
 
 	if (found_memslot)
 		KVM_MMU_UNLOCK(kvm);
+	return 0;
 }
 
 static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
@@ -347,6 +357,7 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	loff_t size = i_size_read(inode);
 	pgoff_t start, end;
 	struct kvm_gmem *gmem;
+	int ret = 0;
 
 	if (offset > size)
 		return 0;
@@ -361,18 +372,22 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	 */
 	filemap_invalidate_lock(inode->i_mapping);
 
-	list_for_each_entry(gmem, gmem_list, entry)
-		kvm_gmem_invalidate_begin(gmem, start, end);
+	list_for_each_entry(gmem, gmem_list, entry) {
+		ret = kvm_gmem_invalidate_begin(gmem, start, end, true);
+		if (ret < 0)
+			goto out;
+	}
 
 	truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
 	kvm_gmem_mark_range_unprepared(inode, start, end - start);
 
+out:
 	list_for_each_entry(gmem, gmem_list, entry)
 		kvm_gmem_invalidate_end(gmem, start, end);
 
 	filemap_invalidate_unlock(inode->i_mapping);
 
-	return 0;
+	return ret;
 }
 
 static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
@@ -440,7 +455,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
 	 * Zap all SPTEs pointed at by this file.  Do not free the backing
 	 * memory, as its lifetime is associated with the inode, not the file.
 	 */
-	kvm_gmem_invalidate_begin(gmem, 0, -1ul);
+	kvm_gmem_invalidate_begin(gmem, 0, -1ul, false);
 	kvm_gmem_invalidate_end(gmem, 0, -1ul);
 
 	list_del(&gmem->entry);
@@ -524,8 +539,9 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
 	start = folio->index;
 	end = start + folio_nr_pages(folio);
 
+	/* The size of the SEPT will not exceed the size of the folio */
 	list_for_each_entry(gmem, gmem_list, entry)
-		kvm_gmem_invalidate_begin(gmem, start, end);
+		kvm_gmem_invalidate_begin(gmem, start, end, false);
 
 	/*
 	 * Do not truncate the range, what action is taken in response to the
-- 
2.43.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ