[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260106102426.25311-1-yan.y.zhao@intel.com>
Date: Tue, 6 Jan 2026 18:24:26 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
seanjc@...gle.com
Cc: linux-kernel@...r.kernel.org,
kvm@...r.kernel.org,
x86@...nel.org,
rick.p.edgecombe@...el.com,
dave.hansen@...el.com,
kas@...nel.org,
tabba@...gle.com,
ackerleytng@...gle.com,
michael.roth@....com,
david@...nel.org,
vannapurve@...gle.com,
sagis@...gle.com,
vbabka@...e.cz,
thomas.lendacky@....com,
nik.borisov@...e.com,
pgonda@...gle.com,
fan.du@...el.com,
jun.miao@...el.com,
francescolavra.fl@...il.com,
jgross@...e.com,
ira.weiny@...el.com,
isaku.yamahata@...el.com,
xiaoyao.li@...el.com,
kai.huang@...el.com,
binbin.wu@...ux.intel.com,
chao.p.peng@...el.com,
chao.gao@...el.com,
yan.y.zhao@...el.com
Subject: [PATCH v3 23/24] x86/tdx: Pass guest memory's PFN info to demote for updating pamt_refcount
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Pass guest memory's PFN info to tdh_mem_page_demote() by adding parameters
"guest_folio" and "guest_start_idx" to tdh_mem_page_demote().
The guest memory's pfn info is not required by directly SEAMCALL
TDH_MEM_PAGE_DEMOTE. Instead, it's used by host kernel to track the
pamt_refcount for the 2MB range containing the guest private memory.
Ater the S-EPT mapping is successfully split, set the pamt_refcount for the
2MB range containing the guest private memory to 512 after ensuring its
original value is 0. Warn loudly if the setting refcount operation fails,
which indicates kernel bugs.
Check guest memory's base pfn is 2MB aligned and all the guest memory is
contained in a single folio in tdh_mem_page_demote() to guard against any
kernel bugs.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
Co-developed-by: Yan Zhao <yan.y.zhao@...el.com>
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
v3:
- Split out as a new patch.
- Added parameters "guest_folio" and "guest_start_idx" to pass the guest
memory pfn info.
- Use atomic_cmpxchg_release() to set guest_pamt_refcount.
- No need to add param "pfn_for_gfn" kvm_x86_ops.split_external_spt() as
the pfn info is already contained in param "old_mirror_spte" in
kvm_x86_ops.split_external_spte().
---
arch/x86/include/asm/tdx.h | 6 +++---
arch/x86/kvm/vmx/tdx.c | 9 ++++++---
arch/x86/virt/vmx/tdx/tdx.c | 30 +++++++++++++++++++++++++-----
3 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 5fc7498392fd..f536782da157 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -250,9 +250,9 @@ u64 tdh_mng_key_config(struct tdx_td *td);
u64 tdh_mng_create(struct tdx_td *td, u16 hkid);
u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp);
u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data);
-u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_sept_page,
- struct tdx_prealloc *prealloc,
- u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct folio *guest_folio,
+ unsigned long guest_start_idx, struct page *new_sept_page,
+ struct tdx_prealloc *prealloc, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2);
u64 tdh_mr_finalize(struct tdx_td *td);
u64 tdh_vp_flush(struct tdx_vp *vp);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index a11ff02a4f30..0054a9de867c 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1991,7 +1991,9 @@ static int tdx_sept_split_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level
u64 old_mirror_spte, void *new_private_spt,
bool mmu_lock_shared)
{
+ struct page *guest_page = pfn_to_page(spte_to_pfn(old_mirror_spte));
struct page *new_sept_page = virt_to_page(new_private_spt);
+ struct folio *guest_folio = page_folio(guest_page);
int tdx_level = pg_level_to_tdx_sept_level(level);
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
gpa_t gpa = gfn_to_gpa(gfn);
@@ -2022,9 +2024,10 @@ static int tdx_sept_split_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level
return -EIO;
spin_lock(&kvm_tdx->prealloc_split_cache_lock);
- err = tdh_do_no_vcpus(tdh_mem_page_demote, kvm, &kvm_tdx->td, gpa,
- tdx_level, new_sept_page,
- &kvm_tdx->prealloc_split_cache, &entry, &level_state);
+ err = tdh_do_no_vcpus(tdh_mem_page_demote, kvm, &kvm_tdx->td, gpa, tdx_level,
+ guest_folio, folio_page_idx(guest_folio, guest_page),
+ new_sept_page, &kvm_tdx->prealloc_split_cache,
+ &entry, &level_state);
spin_unlock(&kvm_tdx->prealloc_split_cache_lock);
if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_DEMOTE, entry, level_state, kvm)) {
tdx_pamt_put(new_sept_page);
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 9917e4e7705f..d036d9b5c87a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -1871,9 +1871,9 @@ static u64 *dpamt_args_array_ptr_r12(struct tdx_module_array_args *args)
return &args->args_array[TDX_ARG_INDEX(r12)];
}
-u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_sept_page,
- struct tdx_prealloc *prealloc,
- u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct folio *guest_folio,
+ unsigned long guest_start_idx, struct page *new_sept_page,
+ struct tdx_prealloc *prealloc, u64 *ext_err1, u64 *ext_err2)
{
bool dpamt = tdx_supports_dynamic_pamt(&tdx_sysinfo) && level == TDX_PS_2M;
u64 guest_memory_pamt_page[MAX_TDX_ARG_SIZE(r12)];
@@ -1882,6 +1882,8 @@ u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_
.args.rdx = tdx_tdr_pa(td),
.args.r8 = page_to_phys(new_sept_page),
};
+ /* base pfn for guest private memory */
+ unsigned long guest_base_pfn;
u64 ret;
if (!tdx_supports_demote_nointerrupt(&tdx_sysinfo))
@@ -1889,6 +1891,15 @@ u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_
if (dpamt) {
u64 *args_array = dpamt_args_array_ptr_r12(&args);
+ unsigned long npages = 1 << (level * PTE_SHIFT);
+ struct page *guest_page;
+
+ guest_page = folio_page(guest_folio, guest_start_idx);
+ guest_base_pfn = page_to_pfn(guest_page);
+
+ if (guest_start_idx + npages > folio_nr_pages(guest_folio) ||
+ !IS_ALIGNED(guest_base_pfn, npages))
+ return TDX_OPERAND_INVALID;
if (alloc_pamt_array(guest_memory_pamt_page, prealloc))
return TDX_SW_ERROR;
@@ -1909,9 +1920,18 @@ u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_
*ext_err1 = args.args.rcx;
*ext_err2 = args.args.rdx;
- if (dpamt && ret)
- free_pamt_array(guest_memory_pamt_page);
+ if (dpamt) {
+ if (ret) {
+ free_pamt_array(guest_memory_pamt_page);
+ } else {
+ /* PAMT refcount for guest private memory */
+ atomic_t *pamt_refcount;
+ pamt_refcount = tdx_find_pamt_refcount(guest_base_pfn);
+ WARN_ON_ONCE(atomic_cmpxchg_release(pamt_refcount, 0,
+ PTRS_PER_PMD));
+ }
+ }
return ret;
}
EXPORT_SYMBOL_GPL(tdh_mem_page_demote);
--
2.43.2
Powered by blists - more mailing lists