[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260129011517.3545883-26-seanjc@google.com>
Date: Wed, 28 Jan 2026 17:14:57 -0800
From: Sean Christopherson <seanjc@...gle.com>
To: Thomas Gleixner <tglx@...nel.org>, Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>, x86@...nel.org,
Kiryl Shutsemau <kas@...nel.org>, Sean Christopherson <seanjc@...gle.com>, Paolo Bonzini <pbonzini@...hat.com>
Cc: linux-kernel@...r.kernel.org, linux-coco@...ts.linux.dev,
kvm@...r.kernel.org, Kai Huang <kai.huang@...el.com>,
Rick Edgecombe <rick.p.edgecombe@...el.com>, Yan Zhao <yan.y.zhao@...el.com>,
Vishal Annapurve <vannapurve@...gle.com>, Ackerley Tng <ackerleytng@...gle.com>,
Sagi Shahar <sagis@...gle.com>, Binbin Wu <binbin.wu@...ux.intel.com>,
Xiaoyao Li <xiaoyao.li@...el.com>, Isaku Yamahata <isaku.yamahata@...el.com>
Subject: [RFC PATCH v5 25/45] *** DO NOT MERGE *** x86/virt/tdx: Don't assume
guest memory is backed by struct page
Remove the completely unnecessary assumptions that memory mapped into a
TDX guest is backed by refcounted struct page memory. TDH_MEM_PAGE_ADD
and TDH_MEM_PAGE_AUG are glorified writes to PTEs, they have no business
placing requirements on how KVM and guest_memfd manage memory.
Rip out the misguided struct page assumptions/constraints before hugepage
support is added for S-EPT, e.g. so the kernel doesn't pick up even worse
assumptions like "a hugepage must be contained in a single folio".
TODO (before merge): Replace "u64 pfn" with something type-safe.
Signed-off-by: Sean Christopherson <seanjc@...gle.com>
---
arch/x86/include/asm/tdx.h | 25 ++++++---------
arch/x86/kvm/vmx/tdx.c | 33 ++++++++++---------
arch/x86/virt/vmx/tdx/tdx.c | 63 +++++++++++++++++++------------------
3 files changed, 59 insertions(+), 62 deletions(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 56bdfbce4289..1f57f7721286 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -154,10 +154,10 @@ static inline void tdx_init_pamt_cache(struct tdx_pamt_cache *cache)
void tdx_free_pamt_cache(struct tdx_pamt_cache *cache);
int tdx_topup_pamt_cache(struct tdx_pamt_cache *cache, unsigned long npages);
-int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache);
-void tdx_pamt_put(struct page *page);
+int tdx_pamt_get(u64 pfn, struct tdx_pamt_cache *cache);
+void tdx_pamt_put(u64 pfn);
-void tdx_quirk_reset_page(struct page *page);
+void tdx_quirk_reset_page(u64 pfn);
int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
@@ -206,23 +206,18 @@ struct tdx_vp {
struct page **tdcx_pages;
};
-static inline u64 mk_keyed_paddr(u16 hkid, struct page *page)
+static inline u64 mk_keyed_paddr(u16 hkid, u64 pfn)
{
- u64 ret;
-
- ret = page_to_phys(page);
- /* KeyID bits are just above the physical address bits: */
- ret |= (u64)hkid << boot_cpu_data.x86_phys_bits;
-
- return ret;
+ /* KeyID bits are just above the physical address bits. */
+ return PFN_PHYS(pfn) | ((u64)hkid << boot_cpu_data.x86_phys_bits);
}
u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
-u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, u64 pfn, struct page *source, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mem_sept_add(struct tdx_td *td, u64 gpa, enum pg_level level, struct page *page, u64 *ext_err1, u64 *ext_err2);
u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page);
-u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, struct page *page, u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, u64 pfn, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mem_range_block(struct tdx_td *td, u64 gpa, enum pg_level level, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mng_key_config(struct tdx_td *td);
u64 tdh_mng_create(struct tdx_td *td, u16 hkid);
@@ -237,12 +232,12 @@ u64 tdh_mng_init(struct tdx_td *td, u64 td_params, u64 *extended_err);
u64 tdh_vp_init(struct tdx_vp *vp, u64 initial_rcx, u32 x2apicid);
u64 tdh_vp_rd(struct tdx_vp *vp, u64 field, u64 *data);
u64 tdh_vp_wr(struct tdx_vp *vp, u64 field, u64 data, u64 mask);
-u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size);
+u64 tdh_phymem_page_reclaim(u64 pfn, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size);
u64 tdh_mem_track(struct tdx_td *tdr);
u64 tdh_mem_page_remove(struct tdx_td *td, u64 gpa, enum pg_level level, u64 *ext_err1, u64 *ext_err2);
u64 tdh_phymem_cache_wb(bool resume);
u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td);
-u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page);
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, u64 pfn);
#else
static inline void tdx_init(void) { }
static inline int tdx_cpu_enable(void) { return -ENODEV; }
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index d74a2547e512..4ac312376ac9 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -318,11 +318,11 @@ static inline void tdx_disassociate_vp(struct kvm_vcpu *vcpu)
})
/* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */
-static int __tdx_reclaim_page(struct page *page)
+static int __tdx_reclaim_page(kvm_pfn_t pfn)
{
u64 err, rcx, rdx, r8;
- err = tdh_phymem_page_reclaim(page, &rcx, &rdx, &r8);
+ err = tdh_phymem_page_reclaim(pfn, &rcx, &rdx, &r8);
/*
* No need to check for TDX_OPERAND_BUSY; all TD pages are freed
@@ -337,11 +337,12 @@ static int __tdx_reclaim_page(struct page *page)
static int tdx_reclaim_page(struct page *page)
{
+ kvm_pfn_t pfn = page_to_pfn(page);
int r;
- r = __tdx_reclaim_page(page);
+ r = __tdx_reclaim_page(pfn);
if (!r)
- tdx_quirk_reset_page(page);
+ tdx_quirk_reset_page(pfn);
return r;
}
@@ -583,7 +584,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
if (!kvm_tdx->td.tdr_page)
return;
- if (__tdx_reclaim_page(kvm_tdx->td.tdr_page))
+ if (__tdx_reclaim_page(page_to_pfn(kvm_tdx->td.tdr_page)))
return;
/*
@@ -595,7 +596,7 @@ static void tdx_reclaim_td_control_pages(struct kvm *kvm)
if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
return;
- tdx_quirk_reset_page(kvm_tdx->td.tdr_page);
+ tdx_quirk_reset_page(page_to_pfn(kvm_tdx->td.tdr_page));
__tdx_free_control_page(kvm_tdx->td.tdr_page);
kvm_tdx->td.tdr_page = NULL;
@@ -1640,8 +1641,8 @@ static int tdx_mem_page_add(struct kvm *kvm, gfn_t gfn, enum pg_level level,
KVM_BUG_ON(!kvm_tdx->page_add_src, kvm))
return -EIO;
- err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn_to_page(pfn),
- kvm_tdx->page_add_src, &entry, &level_state);
+ err = tdh_mem_page_add(&kvm_tdx->td, gpa, pfn, kvm_tdx->page_add_src,
+ &entry, &level_state);
if (unlikely(IS_TDX_OPERAND_BUSY(err)))
return -EBUSY;
@@ -1655,12 +1656,11 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
- struct page *page = pfn_to_page(pfn);
gpa_t gpa = gfn_to_gpa(gfn);
u64 entry, level_state;
u64 err;
- err = tdh_mem_page_aug(&kvm_tdx->td, gpa, level, page, &entry, &level_state);
+ err = tdh_mem_page_aug(&kvm_tdx->td, gpa, level, pfn, &entry, &level_state);
if (unlikely(IS_TDX_OPERAND_BUSY(err)))
return -EBUSY;
@@ -1712,7 +1712,6 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
struct vcpu_tdx *tdx = to_tdx(vcpu);
- struct page *page = pfn_to_page(pfn);
int ret;
if (KVM_BUG_ON(!vcpu, kvm))
@@ -1730,7 +1729,7 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
WARN_ON_ONCE((mirror_spte & VMX_EPT_RWX_MASK) != VMX_EPT_RWX_MASK);
- ret = tdx_pamt_get(page, &tdx->pamt_cache);
+ ret = tdx_pamt_get(pfn, &tdx->pamt_cache);
if (ret)
return ret;
@@ -1752,7 +1751,7 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
ret = tdx_mem_page_add(kvm, gfn, level, pfn);
if (ret)
- tdx_pamt_put(page);
+ tdx_pamt_put(pfn);
return ret;
}
@@ -1828,8 +1827,8 @@ static void tdx_sept_reclaim_private_sp(struct kvm *kvm, gfn_t gfn,
static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, u64 mirror_spte)
{
- struct page *page = pfn_to_page(spte_to_pfn(mirror_spte));
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+ kvm_pfn_t pfn = spte_to_pfn(mirror_spte);
gpa_t gpa = gfn_to_gpa(gfn);
u64 err, entry, level_state;
@@ -1868,12 +1867,12 @@ static void tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_REMOVE, entry, level_state, kvm))
return;
- err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, page);
+ err = tdh_phymem_page_wbinvd_hkid((u16)kvm_tdx->hkid, pfn);
if (TDX_BUG_ON(err, TDH_PHYMEM_PAGE_WBINVD, kvm))
return;
- tdx_quirk_reset_page(page);
- tdx_pamt_put(page);
+ tdx_quirk_reset_page(pfn);
+ tdx_pamt_put(pfn);
}
void tdx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode,
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 90407493bb45..85c31ed9b9d1 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -799,9 +799,9 @@ static void tdx_quirk_reset_paddr(unsigned long base, unsigned long size)
mb();
}
-void tdx_quirk_reset_page(struct page *page)
+void tdx_quirk_reset_page(u64 pfn)
{
- tdx_quirk_reset_paddr(page_to_phys(page), PAGE_SIZE);
+ tdx_quirk_reset_paddr(PFN_PHYS(pfn), PAGE_SIZE);
}
EXPORT_SYMBOL_FOR_KVM(tdx_quirk_reset_page);
@@ -1665,6 +1665,11 @@ static void tdx_clflush_page(struct page *page)
clflush_cache_range(page_to_virt(page), PAGE_SIZE);
}
+static void tdx_clflush_pfn(u64 pfn)
+{
+ clflush_cache_range(__va(PFN_PHYS(pfn)), PAGE_SIZE);
+}
+
static int pg_level_to_tdx_sept_level(enum pg_level level)
{
WARN_ON_ONCE(level == PG_LEVEL_NONE);
@@ -1691,17 +1696,17 @@ u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page)
}
EXPORT_SYMBOL_FOR_KVM(tdh_mng_addcx);
-u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, u64 pfn, struct page *source, u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
.rcx = gpa,
.rdx = tdx_tdr_pa(td),
- .r8 = page_to_phys(page),
+ .r8 = PFN_PHYS(pfn),
.r9 = page_to_phys(source),
};
u64 ret;
- tdx_clflush_page(page);
+ tdx_clflush_pfn(pfn);
ret = seamcall_ret(TDH_MEM_PAGE_ADD, &args);
*ext_err1 = args.rcx;
@@ -1743,17 +1748,17 @@ u64 tdh_vp_addcx(struct tdx_vp *vp, struct page *tdcx_page)
}
EXPORT_SYMBOL_FOR_KVM(tdh_vp_addcx);
-u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level,
- struct page *page, u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_aug(struct tdx_td *td, u64 gpa, enum pg_level level, u64 pfn,
+ u64 *ext_err1, u64 *ext_err2)
{
struct tdx_module_args args = {
.rcx = gpa | pg_level_to_tdx_sept_level(level),
.rdx = tdx_tdr_pa(td),
- .r8 = page_to_phys(page),
+ .r8 = PFN_PHYS(pfn),
};
u64 ret;
- tdx_clflush_page(page);
+ tdx_clflush_pfn(pfn);
ret = seamcall_ret(TDH_MEM_PAGE_AUG, &args);
*ext_err1 = args.rcx;
@@ -1997,10 +2002,10 @@ EXPORT_SYMBOL_FOR_KVM(tdh_vp_init);
* So despite the names, they must be interpted specially as described by the spec. Return
* them only for error reporting purposes.
*/
-u64 tdh_phymem_page_reclaim(struct page *page, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size)
+u64 tdh_phymem_page_reclaim(u64 pfn, u64 *tdx_pt, u64 *tdx_owner, u64 *tdx_size)
{
struct tdx_module_args args = {
- .rcx = page_to_phys(page),
+ .rcx = PFN_PHYS(pfn),
};
u64 ret;
@@ -2056,17 +2061,17 @@ u64 tdh_phymem_page_wbinvd_tdr(struct tdx_td *td)
{
struct tdx_module_args args = {};
- args.rcx = mk_keyed_paddr(tdx_global_keyid, td->tdr_page);
+ args.rcx = mk_keyed_paddr(tdx_global_keyid, page_to_pfn(td->tdr_page));
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_tdr);
-u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
+u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, u64 pfn)
{
struct tdx_module_args args = {};
- args.rcx = mk_keyed_paddr(hkid, page);
+ args.rcx = mk_keyed_paddr(hkid, pfn);
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
@@ -2136,11 +2141,9 @@ static void free_pamt_array(u64 *pa_array)
* Calculate the arg needed for operating on the DPAMT backing for
* a given 4KB page.
*/
-static u64 pamt_2mb_arg(struct page *page)
+static u64 pamt_2mb_arg(u64 pfn)
{
- unsigned long hpa_2mb = ALIGN_DOWN(page_to_phys(page), PMD_SIZE);
-
- return hpa_2mb | TDX_PS_2M;
+ return ALIGN_DOWN(PFN_PHYS(pfn), PMD_SIZE) | TDX_PS_2M;
}
/*
@@ -2149,10 +2152,10 @@ static u64 pamt_2mb_arg(struct page *page)
* error. In the case of TDX module error, the return code is stored
* in tdx_err.
*/
-static u64 tdh_phymem_pamt_add(struct page *page, u64 *pamt_pa_array)
+static u64 tdh_phymem_pamt_add(u64 pfn, u64 *pamt_pa_array)
{
struct tdx_module_args args = {
- .rcx = pamt_2mb_arg(page)
+ .rcx = pamt_2mb_arg(pfn)
};
dpamt_copy_to_regs(&args, rdx, pamt_pa_array);
@@ -2161,10 +2164,10 @@ static u64 tdh_phymem_pamt_add(struct page *page, u64 *pamt_pa_array)
}
/* Remove PAMT backing for the given page. */
-static u64 tdh_phymem_pamt_remove(struct page *page, u64 *pamt_pa_array)
+static u64 tdh_phymem_pamt_remove(u64 pfn, u64 *pamt_pa_array)
{
struct tdx_module_args args = {
- .rcx = pamt_2mb_arg(page),
+ .rcx = pamt_2mb_arg(pfn),
};
u64 ret;
@@ -2180,7 +2183,7 @@ static u64 tdh_phymem_pamt_remove(struct page *page, u64 *pamt_pa_array)
static DEFINE_SPINLOCK(pamt_lock);
/* Bump PAMT refcount for the given page and allocate PAMT memory if needed */
-int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache)
+int tdx_pamt_get(u64 pfn, struct tdx_pamt_cache *cache)
{
u64 pamt_pa_array[MAX_NR_DPAMT_ARGS];
atomic_t *pamt_refcount;
@@ -2190,7 +2193,7 @@ int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache)
if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
return 0;
- pamt_refcount = tdx_find_pamt_refcount(page_to_pfn(page));
+ pamt_refcount = tdx_find_pamt_refcount(pfn);
/*
* If the pamt page is already added (i.e. refcount >= 1),
@@ -2214,7 +2217,7 @@ int tdx_pamt_get(struct page *page, struct tdx_pamt_cache *cache)
}
/* Try to add the pamt page and take the refcount 0->1. */
- tdx_status = tdh_phymem_pamt_add(page, pamt_pa_array);
+ tdx_status = tdh_phymem_pamt_add(pfn, pamt_pa_array);
if (IS_TDX_SUCCESS(tdx_status)) {
/*
* The refcount is zero, and this locked path is the only way to
@@ -2257,7 +2260,7 @@ EXPORT_SYMBOL_FOR_KVM(tdx_pamt_get);
* Drop PAMT refcount for the given page and free PAMT memory if it is no
* longer needed.
*/
-void tdx_pamt_put(struct page *page)
+void tdx_pamt_put(u64 pfn)
{
u64 pamt_pa_array[MAX_NR_DPAMT_ARGS];
atomic_t *pamt_refcount;
@@ -2266,7 +2269,7 @@ void tdx_pamt_put(struct page *page)
if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
return;
- pamt_refcount = tdx_find_pamt_refcount(page_to_pfn(page));
+ pamt_refcount = tdx_find_pamt_refcount(pfn);
/*
* If the there are more than 1 references on the pamt page,
@@ -2285,7 +2288,7 @@ void tdx_pamt_put(struct page *page)
return;
/* Try to remove the pamt page and take the refcount 1->0. */
- tdx_status = tdh_phymem_pamt_remove(page, pamt_pa_array);
+ tdx_status = tdh_phymem_pamt_remove(pfn, pamt_pa_array);
/*
* Don't free pamt_pa_array as it could hold garbage when
@@ -2357,7 +2360,7 @@ struct page *__tdx_alloc_control_page(gfp_t gfp)
if (!page)
return NULL;
- if (tdx_pamt_get(page, NULL)) {
+ if (tdx_pamt_get(page_to_pfn(page), NULL)) {
__free_page(page);
return NULL;
}
@@ -2375,7 +2378,7 @@ void __tdx_free_control_page(struct page *page)
if (!page)
return;
- tdx_pamt_put(page);
+ tdx_pamt_put(page_to_pfn(page));
__free_page(page);
}
EXPORT_SYMBOL_FOR_KVM(__tdx_free_control_page);
--
2.53.0.rc1.217.geba53bf80e-goog
Powered by blists - more mailing lists