[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250424030858.519-1-yan.y.zhao@intel.com>
Date: Thu, 24 Apr 2025 11:08:58 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
seanjc@...gle.com
Cc: linux-kernel@...r.kernel.org,
kvm@...r.kernel.org,
x86@...nel.org,
rick.p.edgecombe@...el.com,
dave.hansen@...el.com,
kirill.shutemov@...el.com,
tabba@...gle.com,
ackerleytng@...gle.com,
quic_eberman@...cinc.com,
michael.roth@....com,
david@...hat.com,
vannapurve@...gle.com,
vbabka@...e.cz,
jroedel@...e.de,
thomas.lendacky@....com,
pgonda@...gle.com,
zhiquan1.li@...el.com,
fan.du@...el.com,
jun.miao@...el.com,
ira.weiny@...el.com,
isaku.yamahata@...el.com,
xiaoyao.li@...el.com,
binbin.wu@...ux.intel.com,
chao.p.peng@...el.com,
Yan Zhao <yan.y.zhao@...el.com>
Subject: [RFC PATCH 19/21] KVM: gmem: Split huge boundary leafs for punch hole of private memory
Splitting of huge leafs in the mirror page table for kvm_gmem_punch_hole().
Enhance kvm_gmem_invalidate_begin() to invoke kvm_split_boundary_leafs()
for splitting boundary huge leafs before caling kvm_unmap_gfn_range() to do
the real zapping. As kvm_split_boundary_leafs() may fail due to out of
memory, propagate the error to further fail the kvm_gmem_punch_hole().
Splitting huge boudary leafs in the mirror page table is not required for
kvm_gmem_release() as the entire page table is to be zapped; it's also not
required for kvm_gmem_error_folio() as a SPTE must not map more than one
physical folio.
Note: as the kvm_gmem_punch_hole() may request to zap several GFN ranges,
if an out-of-memory error occurs during the splitting of a GFN range, some
previous GFN ranges may have been successfully split and zapped.
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
virt/kvm/guest_memfd.c | 30 +++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 4bb140e7f30d..008061734ac5 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -292,13 +292,14 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, int
return folio;
}
-static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
- pgoff_t end)
+static int kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
+ pgoff_t end, bool need_split)
{
bool flush = false, found_memslot = false;
struct kvm_memory_slot *slot;
struct kvm *kvm = gmem->kvm;
unsigned long index;
+ int ret = 0;
xa_for_each_range(&gmem->bindings, index, slot, start, end - 1) {
pgoff_t pgoff = slot->gmem.pgoff;
@@ -319,14 +320,23 @@ static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start,
kvm_mmu_invalidate_begin(kvm);
}
+ if (need_split) {
+ ret = kvm_split_boundary_leafs(kvm, &gfn_range);
+ if (ret < 0)
+ goto out;
+
+ flush |= ret;
+ }
flush |= kvm_mmu_unmap_gfn_range(kvm, &gfn_range);
}
+out:
if (flush)
kvm_flush_remote_tlbs(kvm);
if (found_memslot)
KVM_MMU_UNLOCK(kvm);
+ return 0;
}
static void kvm_gmem_invalidate_end(struct kvm_gmem *gmem, pgoff_t start,
@@ -347,6 +357,7 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
loff_t size = i_size_read(inode);
pgoff_t start, end;
struct kvm_gmem *gmem;
+ int ret = 0;
if (offset > size)
return 0;
@@ -361,18 +372,22 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
*/
filemap_invalidate_lock(inode->i_mapping);
- list_for_each_entry(gmem, gmem_list, entry)
- kvm_gmem_invalidate_begin(gmem, start, end);
+ list_for_each_entry(gmem, gmem_list, entry) {
+ ret = kvm_gmem_invalidate_begin(gmem, start, end, true);
+ if (ret < 0)
+ goto out;
+ }
truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
kvm_gmem_mark_range_unprepared(inode, start, end - start);
+out:
list_for_each_entry(gmem, gmem_list, entry)
kvm_gmem_invalidate_end(gmem, start, end);
filemap_invalidate_unlock(inode->i_mapping);
- return 0;
+ return ret;
}
static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
@@ -440,7 +455,7 @@ static int kvm_gmem_release(struct inode *inode, struct file *file)
* Zap all SPTEs pointed at by this file. Do not free the backing
* memory, as its lifetime is associated with the inode, not the file.
*/
- kvm_gmem_invalidate_begin(gmem, 0, -1ul);
+ kvm_gmem_invalidate_begin(gmem, 0, -1ul, false);
kvm_gmem_invalidate_end(gmem, 0, -1ul);
list_del(&gmem->entry);
@@ -524,8 +539,9 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol
start = folio->index;
end = start + folio_nr_pages(folio);
+ /* The size of the SEPT will not exceed the size of the folio */
list_for_each_entry(gmem, gmem_list, entry)
- kvm_gmem_invalidate_begin(gmem, start, end);
+ kvm_gmem_invalidate_begin(gmem, start, end, false);
/*
* Do not truncate the range, what action is taken in response to the
--
2.43.2
Powered by blists - more mailing lists