[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260129011517.3545883-44-seanjc@google.com>
Date: Wed, 28 Jan 2026 17:15:15 -0800
From: Sean Christopherson <seanjc@...gle.com>
To: Thomas Gleixner <tglx@...nel.org>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
Kiryl Shutsemau <kas@...nel.org>, Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc: linux-kernel@...r.kernel.org, linux-coco@...ts.linux.dev,
kvm@...r.kernel.org, Kai Huang <kai.huang@...el.com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>, Yan Zhao <yan.y.zhao@...el.com>,
Vishal Annapurve <vannapurve@...gle.com>, Ackerley Tng <ackerleytng@...gle.com>,
Sagi Shahar <sagis@...gle.com>, Binbin Wu <binbin.wu@...ux.intel.com>,
Xiaoyao Li <xiaoyao.li@...el.com>, Isaku Yamahata <isaku.yamahata@...el.com>
Subject: [RFC PATCH v5 43/45] *** DO NOT MERGE *** KVM: guest_memfd: Add
pre-zap arch hook for shared<=>private conversion
Add a gmem "pre-zap" hook to allow arch code to take action before a
shared<=>private conversion, and just as importantly, to let arch code
reject/fail a conversion, e.g. if the conversion requires new page tables
and KVM hits in OOM situation.
The new hook will be used by TDX to split hugepages as necessary to avoid
overzapping PTEs, which for all intents and purposes corrupts guest data
for TDX VMs (memory is wiped when private PTEs are removed).
TODO: Wire this up the convert path, not the PUNCH_HOLE path, once in-place
conversion is supported.
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
arch/x86/kvm/Kconfig | 1 +
arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++++
include/linux/kvm_host.h | 5 ++++
virt/kvm/Kconfig | 4 +++
virt/kvm/guest_memfd.c | 50 ++++++++++++++++++++++++++++++++++++--
5 files changed, 66 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index d916bd766c94..5f8d8daf4289 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -138,6 +138,7 @@ config KVM_INTEL_TDX
depends on INTEL_TDX_HOST
select KVM_GENERIC_MEMORY_ATTRIBUTES
select HAVE_KVM_ARCH_GMEM_POPULATE
+ select HAVE_KVM_ARCH_GMEM_CONVERT
help
Provides support for launching Intel Trust Domain Extensions (TDX)
confidential VMs on Intel processors.
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 0cdc6782e508..c46ebdacdb50 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1630,6 +1630,14 @@ int kvm_tdp_mmu_split_huge_pages(struct kvm_vcpu *vcpu, gfn_t start, gfn_t end,
}
EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_tdp_mmu_split_huge_pages);
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT
+int kvm_arch_gmem_convert(struct kvm *kvm, gfn_t start, gfn_t end,
+ bool to_private)
+{
+ return 0;
+}
+#endif /* CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT */
+
static bool tdp_mmu_need_write_protect(struct kvm *kvm, struct kvm_mmu_page *sp)
{
/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 782f4d670793..c0bafff274b6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2588,6 +2588,11 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages
kvm_gmem_populate_cb post_populate, void *opaque);
#endif
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT
+int kvm_arch_gmem_convert(struct kvm *kvm, gfn_t start, gfn_t end,
+ bool to_private);
+#endif
+
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE
void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
#endif
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 267c7369c765..05d69eaa50ae 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -125,3 +125,7 @@ config HAVE_KVM_ARCH_GMEM_INVALIDATE
config HAVE_KVM_ARCH_GMEM_POPULATE
bool
depends on KVM_GUEST_MEMFD
+
+config HAVE_KVM_ARCH_GMEM_CONVERT
+ bool
+ depends on KVM_GUEST_MEMFD
\ No newline at end of file
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 51dbb309188f..b01f333a5e95 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -164,6 +164,46 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
return folio;
}
+#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_CONVERT
+static int __kvm_gmem_convert(struct gmem_file *f, pgoff_t start, pgoff_t end,
+ bool to_private)
+{
+ struct kvm_memory_slot *slot;
+ unsigned long index;
+ int r;
+
+ xa_for_each_range(&f->bindings, index, slot, start, end - 1) {
+ r = kvm_arch_gmem_convert(f->kvm,
+ kvm_gmem_get_start_gfn(slot, start),
+ kvm_gmem_get_end_gfn(slot, end),
+ to_private);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+static int kvm_gmem_convert(struct inode *inode, pgoff_t start, pgoff_t end,
+ bool to_private)
+{
+ struct gmem_file *f;
+ int r;
+
+ kvm_gmem_for_each_file(f, inode->i_mapping) {
+ r = __kvm_gmem_convert(f, start, end, to_private);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+#else
+static int kvm_gmem_convert(struct inode *inode, pgoff_t start, pgoff_t end,
+ bool to_private)
+{
+ return 0;
+}
+#endif
+
static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode)
{
if (GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED)
@@ -244,6 +284,7 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t start = offset >> PAGE_SHIFT;
pgoff_t end = (offset + len) >> PAGE_SHIFT;
+ int r;
/*
* Bindings must be stable across invalidation to ensure the start+end
@@ -253,13 +294,18 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
kvm_gmem_invalidate_begin(inode, start, end);
- truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
+ /*
+ * For demonstration purposes, pretend this is a private=>shared conversion.
+ */
+ r = kvm_gmem_convert(inode, start, end, false);
+ if (!r)
+ truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
kvm_gmem_invalidate_end(inode, start, end);
filemap_invalidate_unlock(inode->i_mapping);
- return 0;
+ return r;
}
static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len)
--
2.53.0.rc1.217.geba53bf80e-goog
Powered by blists - more mailing lists