[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250502130828.4071412-3-kirill.shutemov@linux.intel.com>
Date: Fri,  2 May 2025 16:08:18 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: pbonzini@...hat.com,
	seanjc@...gle.com
Cc: rick.p.edgecombe@...el.com,
	isaku.yamahata@...el.com,
	kai.huang@...el.com,
	yan.y.zhao@...el.com,
	tglx@...utronix.de,
	mingo@...hat.com,
	bp@...en8.de,
	dave.hansen@...ux.intel.com,
	kvm@...r.kernel.org,
	x86@...nel.org,
	linux-coco@...ts.linux.dev,
	linux-kernel@...r.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [RFC, PATCH 02/12] x86/virt/tdx: Allocate reference counters for PAMT memory
The PAMT memory holds metadata for TDX-protected memory. With Dynamic
PAMT, PAMT_4K is allocated on demand. The kernel supplies the TDX module
with a page pair that covers 2M of host physical memory.
The kernel must provide this page pair before using pages from the range
for TDX. If this is not done, any SEAMCALL that attempts to use the
memory will fail.
Allocate reference counters for every 2M range to track PAMT memory
usage. This is necessary to accurately determine when PAMT memory needs
to be allocated and when it can be freed.
This allocation will consume 2MiB for every 1TiB of physical memory.
Tracking PAMT memory usage on the kernel side duplicates what TDX module
does.  It is possible to avoid this by lazily allocating PAMT memory on
SEAMCALL failure and freeing it based on hints provided by the TDX
module when the last user of PAMT memory is no longer present.
However, this approach complicates serialization.
The TDX module takes locks when dealing with PAMT: a shared lock on any
SEAMCALL that uses explicit HPA and an exclusive lock on PAMT.ADD and
PAMT.REMOVE. Any SEAMCALL that uses explicit HPA as an operand may fail
if it races with PAMT.ADD/REMOVE.
Since PAMT is a global resource, to prevent failure the kernel would
need global locking (per-TD is not sufficient). Or, it has to retry on
TDX_OPERATOR_BUSY.
Both options are not ideal, and tracking PAMT usage on the kernel side
seems like a reasonable alternative.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
 arch/x86/virt/vmx/tdx/tdx.c | 113 +++++++++++++++++++++++++++++++++++-
 1 file changed, 111 insertions(+), 2 deletions(-)
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index c8bfd765e451..00e07a0c908a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -29,6 +29,7 @@
 #include <linux/acpi.h>
 #include <linux/suspend.h>
 #include <linux/idr.h>
+#include <linux/vmalloc.h>
 #include <asm/page.h>
 #include <asm/special_insns.h>
 #include <asm/msr-index.h>
@@ -50,6 +51,8 @@ static DEFINE_PER_CPU(bool, tdx_lp_initialized);
 
 static struct tdmr_info_list tdx_tdmr_list;
 
+static atomic_t *pamt_refcounts;
+
 static enum tdx_module_status_t tdx_module_status;
 static DEFINE_MUTEX(tdx_module_lock);
 
@@ -1035,9 +1038,108 @@ static int config_global_keyid(void)
 	return ret;
 }
 
+atomic_t *tdx_get_pamt_refcount(unsigned long hpa)
+{
+	return &pamt_refcounts[hpa / PMD_SIZE];
+}
+EXPORT_SYMBOL_GPL(tdx_get_pamt_refcount);
+
+static int pamt_refcount_populate(pte_t *pte, unsigned long addr, void *data)
+{
+	unsigned long vaddr;
+	pte_t entry;
+
+	if (!pte_none(ptep_get(pte)))
+		return 0;
+
+	vaddr = __get_free_page(GFP_KERNEL | __GFP_ZERO);
+	if (!vaddr)
+		return -ENOMEM;
+
+	entry = pfn_pte(PFN_DOWN(__pa(vaddr)), PAGE_KERNEL);
+
+	spin_lock(&init_mm.page_table_lock);
+	if (pte_none(ptep_get(pte)))
+		set_pte_at(&init_mm, addr, pte, entry);
+	else
+		free_page(vaddr);
+	spin_unlock(&init_mm.page_table_lock);
+
+	return 0;
+}
+
+static int pamt_refcount_depopulate(pte_t *pte, unsigned long addr,
+				    void *data)
+{
+	unsigned long vaddr;
+
+	vaddr = (unsigned long)__va(PFN_PHYS(pte_pfn(ptep_get(pte))));
+
+	spin_lock(&init_mm.page_table_lock);
+	if (!pte_none(ptep_get(pte))) {
+		pte_clear(&init_mm, addr, pte);
+		free_page(vaddr);
+	}
+	spin_unlock(&init_mm.page_table_lock);
+
+	return 0;
+}
+
+static int alloc_tdmr_pamt_refcount(struct tdmr_info *tdmr)
+{
+	unsigned long start, end;
+
+	start = (unsigned long)tdx_get_pamt_refcount(tdmr->base);
+	end = (unsigned long)tdx_get_pamt_refcount(tdmr->base + tdmr->size);
+	start = round_down(start, PAGE_SIZE);
+	end = round_up(end, PAGE_SIZE);
+
+	return apply_to_page_range(&init_mm, start, end - start,
+				   pamt_refcount_populate, NULL);
+}
+
+static int init_pamt_metadata(void)
+{
+	size_t size = max_pfn / PTRS_PER_PTE * sizeof(*pamt_refcounts);
+	struct vm_struct *area;
+
+	if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+		return 0;
+
+	/*
+	 * Reserve vmalloc range for PAMT reference counters. It covers all
+	 * physical address space up to max_pfn. It is going to be populated
+	 * from init_tdmr() only for present memory that available for TDX use.
+	 */
+	area = get_vm_area(size, VM_IOREMAP);
+	if (!area)
+		return -ENOMEM;
+
+	pamt_refcounts = area->addr;
+	return 0;
+}
+
+static void free_pamt_metadata(void)
+{
+	size_t size = max_pfn / PTRS_PER_PTE * sizeof(*pamt_refcounts);
+
+	size = round_up(size, PAGE_SIZE);
+	apply_to_existing_page_range(&init_mm,
+				     (unsigned long)pamt_refcounts,
+				     size, pamt_refcount_depopulate,
+				     NULL);
+	vfree(pamt_refcounts);
+	pamt_refcounts = NULL;
+}
+
 static int init_tdmr(struct tdmr_info *tdmr)
 {
 	u64 next;
+	int ret;
+
+	ret = alloc_tdmr_pamt_refcount(tdmr);
+	if (ret)
+		return ret;
 
 	/*
 	 * Initializing a TDMR can be time consuming.  To avoid long
@@ -1048,7 +1150,6 @@ static int init_tdmr(struct tdmr_info *tdmr)
 		struct tdx_module_args args = {
 			.rcx = tdmr->base,
 		};
-		int ret;
 
 		ret = seamcall_prerr_ret(TDH_SYS_TDMR_INIT, &args);
 		if (ret)
@@ -1134,10 +1235,15 @@ static int init_tdx_module(void)
 	if (ret)
 		goto err_reset_pamts;
 
+	/* Reserve vmalloc range for PAMT reference counters */
+	ret = init_pamt_metadata();
+	if (ret)
+		goto err_reset_pamts;
+
 	/* Initialize TDMRs to complete the TDX module initialization */
 	ret = init_tdmrs(&tdx_tdmr_list);
 	if (ret)
-		goto err_reset_pamts;
+		goto err_free_pamt_metadata;
 
 	pr_info("%lu KB allocated for PAMT\n", tdmrs_count_pamt_kb(&tdx_tdmr_list));
 
@@ -1149,6 +1255,9 @@ static int init_tdx_module(void)
 	put_online_mems();
 	return ret;
 
+err_free_pamt_metadata:
+	free_pamt_metadata();
+
 err_reset_pamts:
 	/*
 	 * Part of PAMTs may already have been initialized by the
-- 
2.47.2
Powered by blists - more mailing lists
 
