lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260106102426.25311-1-yan.y.zhao@intel.com>
Date: Tue,  6 Jan 2026 18:24:26 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
	seanjc@...gle.com
Cc: linux-kernel@...r.kernel.org,
	kvm@...r.kernel.org,
	x86@...nel.org,
	rick.p.edgecombe@...el.com,
	dave.hansen@...el.com,
	kas@...nel.org,
	tabba@...gle.com,
	ackerleytng@...gle.com,
	michael.roth@....com,
	david@...nel.org,
	vannapurve@...gle.com,
	sagis@...gle.com,
	vbabka@...e.cz,
	thomas.lendacky@....com,
	nik.borisov@...e.com,
	pgonda@...gle.com,
	fan.du@...el.com,
	jun.miao@...el.com,
	francescolavra.fl@...il.com,
	jgross@...e.com,
	ira.weiny@...el.com,
	isaku.yamahata@...el.com,
	xiaoyao.li@...el.com,
	kai.huang@...el.com,
	binbin.wu@...ux.intel.com,
	chao.p.peng@...el.com,
	chao.gao@...el.com,
	yan.y.zhao@...el.com
Subject: [PATCH v3 23/24] x86/tdx: Pass guest memory's PFN info to demote for updating pamt_refcount

From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>

Pass guest memory's PFN info to tdh_mem_page_demote() by adding parameters
"guest_folio" and "guest_start_idx" to tdh_mem_page_demote().

The guest memory's pfn info is not required by directly SEAMCALL
TDH_MEM_PAGE_DEMOTE. Instead, it's used by host kernel to track the
pamt_refcount for the 2MB range containing the guest private memory.

Ater the S-EPT mapping is successfully split, set the pamt_refcount for the
2MB range containing the guest private memory to 512 after ensuring its
original value is 0. Warn loudly if the setting refcount operation fails,
which indicates kernel bugs.

Check guest memory's base pfn is 2MB aligned and all the guest memory is
contained in a single folio in tdh_mem_page_demote() to guard against any
kernel bugs.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
Co-developed-by: Yan Zhao <yan.y.zhao@...el.com>
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
v3:
- Split out as a new patch.
- Added parameters "guest_folio" and "guest_start_idx" to pass the guest
  memory pfn info.
- Use atomic_cmpxchg_release() to set guest_pamt_refcount.
- No need to add param "pfn_for_gfn" kvm_x86_ops.split_external_spt() as
  the pfn info is already contained in param "old_mirror_spte" in
  kvm_x86_ops.split_external_spte().
---
 arch/x86/include/asm/tdx.h  |  6 +++---
 arch/x86/kvm/vmx/tdx.c      |  9 ++++++---
 arch/x86/virt/vmx/tdx/tdx.c | 30 +++++++++++++++++++++++++-----
 3 files changed, 34 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 5fc7498392fd..f536782da157 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -250,9 +250,9 @@ u64 tdh_mng_key_config(struct tdx_td *td);
 u64 tdh_mng_create(struct tdx_td *td, u16 hkid);
 u64 tdh_vp_create(struct tdx_td *td, struct tdx_vp *vp);
 u64 tdh_mng_rd(struct tdx_td *td, u64 field, u64 *data);
-u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_sept_page,
-			struct tdx_prealloc *prealloc,
-			u64 *ext_err1, u64 *ext_err2);
+u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct folio *guest_folio,
+			unsigned long guest_start_idx, struct page *new_sept_page,
+			struct tdx_prealloc *prealloc, u64 *ext_err1, u64 *ext_err2);
 u64 tdh_mr_extend(struct tdx_td *td, u64 gpa, u64 *ext_err1, u64 *ext_err2);
 u64 tdh_mr_finalize(struct tdx_td *td);
 u64 tdh_vp_flush(struct tdx_vp *vp);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index a11ff02a4f30..0054a9de867c 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1991,7 +1991,9 @@ static int tdx_sept_split_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level
 				       u64 old_mirror_spte, void *new_private_spt,
 				       bool mmu_lock_shared)
 {
+	struct page *guest_page = pfn_to_page(spte_to_pfn(old_mirror_spte));
 	struct page *new_sept_page = virt_to_page(new_private_spt);
+	struct folio *guest_folio = page_folio(guest_page);
 	int tdx_level = pg_level_to_tdx_sept_level(level);
 	struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
 	gpa_t gpa = gfn_to_gpa(gfn);
@@ -2022,9 +2024,10 @@ static int tdx_sept_split_private_spte(struct kvm *kvm, gfn_t gfn, enum pg_level
 		return -EIO;
 
 	spin_lock(&kvm_tdx->prealloc_split_cache_lock);
-	err = tdh_do_no_vcpus(tdh_mem_page_demote, kvm, &kvm_tdx->td, gpa,
-			      tdx_level, new_sept_page,
-			      &kvm_tdx->prealloc_split_cache, &entry, &level_state);
+	err = tdh_do_no_vcpus(tdh_mem_page_demote, kvm, &kvm_tdx->td, gpa, tdx_level,
+			      guest_folio, folio_page_idx(guest_folio, guest_page),
+			      new_sept_page, &kvm_tdx->prealloc_split_cache,
+			      &entry, &level_state);
 	spin_unlock(&kvm_tdx->prealloc_split_cache_lock);
 	if (TDX_BUG_ON_2(err, TDH_MEM_PAGE_DEMOTE, entry, level_state, kvm)) {
 		tdx_pamt_put(new_sept_page);
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 9917e4e7705f..d036d9b5c87a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -1871,9 +1871,9 @@ static u64 *dpamt_args_array_ptr_r12(struct tdx_module_array_args *args)
 	return &args->args_array[TDX_ARG_INDEX(r12)];
 }
 
-u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_sept_page,
-			struct tdx_prealloc *prealloc,
-			u64 *ext_err1, u64 *ext_err2)
+u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct folio *guest_folio,
+			unsigned long guest_start_idx, struct page *new_sept_page,
+			struct tdx_prealloc *prealloc, u64 *ext_err1, u64 *ext_err2)
 {
 	bool dpamt = tdx_supports_dynamic_pamt(&tdx_sysinfo) && level == TDX_PS_2M;
 	u64 guest_memory_pamt_page[MAX_TDX_ARG_SIZE(r12)];
@@ -1882,6 +1882,8 @@ u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_
 		.args.rdx = tdx_tdr_pa(td),
 		.args.r8 = page_to_phys(new_sept_page),
 	};
+	/* base pfn for guest private memory */
+	unsigned long guest_base_pfn;
 	u64 ret;
 
 	if (!tdx_supports_demote_nointerrupt(&tdx_sysinfo))
@@ -1889,6 +1891,15 @@ u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_
 
 	if (dpamt) {
 		u64 *args_array = dpamt_args_array_ptr_r12(&args);
+		unsigned long npages = 1 << (level * PTE_SHIFT);
+		struct page *guest_page;
+
+		guest_page = folio_page(guest_folio, guest_start_idx);
+		guest_base_pfn = page_to_pfn(guest_page);
+
+		if (guest_start_idx + npages > folio_nr_pages(guest_folio) ||
+		    !IS_ALIGNED(guest_base_pfn, npages))
+			return TDX_OPERAND_INVALID;
 
 		if (alloc_pamt_array(guest_memory_pamt_page, prealloc))
 			return TDX_SW_ERROR;
@@ -1909,9 +1920,18 @@ u64 tdh_mem_page_demote(struct tdx_td *td, u64 gpa, int level, struct page *new_
 	*ext_err1 = args.args.rcx;
 	*ext_err2 = args.args.rdx;
 
-	if (dpamt && ret)
-		free_pamt_array(guest_memory_pamt_page);
+	if (dpamt) {
+		if (ret) {
+			free_pamt_array(guest_memory_pamt_page);
+		} else {
+			/* PAMT refcount for guest private memory */
+			atomic_t *pamt_refcount;
 
+			pamt_refcount = tdx_find_pamt_refcount(guest_base_pfn);
+			WARN_ON_ONCE(atomic_cmpxchg_release(pamt_refcount, 0,
+							    PTRS_PER_PMD));
+		}
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(tdh_mem_page_demote);
-- 
2.43.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ