[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250610145116.2502206-1-kirill.shutemov@linux.intel.com>
Date: Tue, 10 Jun 2025 17:51:16 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: chao.gao@...el.com
Cc: bp@...en8.de,
dave.hansen@...ux.intel.com,
isaku.yamahata@...el.com,
kai.huang@...el.com,
kirill.shutemov@...ux.intel.com,
kvm@...r.kernel.org,
linux-coco@...ts.linux.dev,
linux-kernel@...r.kernel.org,
mingo@...hat.com,
pbonzini@...hat.com,
rick.p.edgecombe@...el.com,
seanjc@...gle.com,
tglx@...utronix.de,
x86@...nel.org,
yan.y.zhao@...el.com
Subject: [PATCHv2.1 04/12] x86/virt/tdx: Add tdx_alloc/free_page() helpers
The new helpers allocate and free pages that can be used for a TDs.
Besides page allocation and freeing, these helpers also take care about
managing PAMT memory, if kernel runs on a platform with Dynamic PAMT
supported.
tdx_pamt_get()/put() helpers take care of PAMT allocation/freeing and
its refcounting.
PAMT memory is allocated when refcount for the 2M range crosses from 0
to 1 and gets freed back on when it is dropped to zero. These
transitions can happen concurrently and pamt_lock spinlock serializes
them.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
arch/x86/include/asm/tdx.h | 3 +
arch/x86/include/asm/tdx_errno.h | 6 +
arch/x86/virt/vmx/tdx/tdx.c | 224 +++++++++++++++++++++++++++++++
arch/x86/virt/vmx/tdx/tdx.h | 2 +
4 files changed, 235 insertions(+)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 583d6fe66821..d9a77147412f 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -115,6 +115,9 @@ int tdx_guest_keyid_alloc(void);
u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
+struct page *tdx_alloc_page(void);
+void tdx_free_page(struct page *page);
+
struct tdx_td {
/* TD root structure: */
struct page *tdr_page;
diff --git a/arch/x86/include/asm/tdx_errno.h b/arch/x86/include/asm/tdx_errno.h
index d418934176e2..0b3332c2d6b2 100644
--- a/arch/x86/include/asm/tdx_errno.h
+++ b/arch/x86/include/asm/tdx_errno.h
@@ -18,6 +18,7 @@
#define TDX_PREVIOUS_TLB_EPOCH_BUSY 0x8000020100000000ULL
#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
#define TDX_PAGE_METADATA_INCORRECT 0xC000030000000000ULL
+#define TDX_HPA_RANGE_NOT_FREE 0xC000030400000000ULL
#define TDX_VCPU_NOT_ASSOCIATED 0x8000070200000000ULL
#define TDX_KEY_GENERATION_FAILED 0x8000080000000000ULL
#define TDX_KEY_STATE_INCORRECT 0xC000081100000000ULL
@@ -86,5 +87,10 @@ static inline bool tdx_operand_busy(u64 err)
{
return tdx_status(err) == TDX_OPERAND_BUSY;
}
+
+static inline bool tdx_hpa_range_not_free(u64 err)
+{
+ return tdx_status(err) == TDX_HPA_RANGE_NOT_FREE;
+}
#endif /* __ASSEMBLER__ */
#endif /* _X86_TDX_ERRNO_H */
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index ad9d7a30989d..3830fbc06397 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -2000,3 +2000,227 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
}
EXPORT_SYMBOL_GPL(tdh_phymem_page_wbinvd_hkid);
+
+static int tdx_nr_pamt_pages(void)
+{
+ if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+ return 0;
+
+ return tdx_sysinfo.tdmr.pamt_4k_entry_size * PTRS_PER_PTE / PAGE_SIZE;
+}
+
+static u64 tdh_phymem_pamt_add(unsigned long hpa,
+ struct list_head *pamt_pages)
+{
+ struct tdx_module_args args = {
+ .rcx = hpa,
+ };
+ struct page *page;
+ u64 *p;
+
+ WARN_ON_ONCE(!IS_ALIGNED(hpa & PAGE_MASK, PMD_SIZE));
+
+ p = &args.rdx;
+ list_for_each_entry(page, pamt_pages, lru) {
+ *p = page_to_phys(page);
+ p++;
+ }
+
+ return seamcall(TDH_PHYMEM_PAMT_ADD, &args);
+}
+
+static u64 tdh_phymem_pamt_remove(unsigned long hpa,
+ struct list_head *pamt_pages)
+{
+ struct tdx_module_args args = {
+ .rcx = hpa,
+ };
+ struct page *page;
+ u64 *p, ret;
+
+ WARN_ON_ONCE(!IS_ALIGNED(hpa & PAGE_MASK, PMD_SIZE));
+
+ ret = seamcall_ret(TDH_PHYMEM_PAMT_REMOVE, &args);
+ if (ret)
+ return ret;
+
+ p = &args.rdx;
+ for (int i = 0; i < tdx_nr_pamt_pages(); i++) {
+ page = phys_to_page(*p);
+ list_add(&page->lru, pamt_pages);
+ p++;
+ }
+
+ return ret;
+}
+
+static DEFINE_SPINLOCK(pamt_lock);
+
+static void tdx_free_pamt_pages(struct list_head *pamt_pages)
+{
+ struct page *page;
+
+ while ((page = list_first_entry_or_null(pamt_pages, struct page, lru))) {
+ list_del(&page->lru);
+ __free_page(page);
+ }
+}
+
+static int tdx_alloc_pamt_pages(struct list_head *pamt_pages)
+{
+ for (int i = 0; i < tdx_nr_pamt_pages(); i++) {
+ struct page *page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ tdx_free_pamt_pages(pamt_pages);
+ return -ENOMEM;
+ }
+ list_add(&page->lru, pamt_pages);
+ }
+ return 0;
+}
+
+/*
+ * Returns >=0 on success. -errno on failure.
+ *
+ * Non-zero return value indicates that the pamt_pages unused and can be freed.
+ */
+static int tdx_pamt_add(atomic_t *pamt_refcount, unsigned long hpa,
+ struct list_head *pamt_pages)
+{
+ u64 err;
+
+ guard(spinlock)(&pamt_lock);
+
+ hpa = ALIGN_DOWN(hpa, PMD_SIZE);
+
+ /*
+ * Lost race to other tdx_pamt_add(). Other task has already allocated
+ * PAMT memory for the HPA.
+ *
+ * Return 1 to indicate that pamt_pages is unused and can be freed.
+ */
+ if (atomic_read(pamt_refcount) != 0) {
+ atomic_inc(pamt_refcount);
+ return 1;
+ }
+
+ err = tdh_phymem_pamt_add(hpa | TDX_PS_2M, pamt_pages);
+
+ /*
+ * tdx_hpa_range_not_free() is true if current task won race
+ * against tdx_pamt_put().
+ */
+ if (err && !tdx_hpa_range_not_free(err)) {
+ pr_err("TDH_PHYMEM_PAMT_ADD failed: %#llx\n", err);
+ return -EIO;
+ }
+
+ atomic_set(pamt_refcount, 1);
+
+ /*
+ * Current task won race against tdx_pamt_put() and prevented it
+ * from freeing PAMT memory.
+ *
+ * Return 1 to indicate that pamt_pages is unused and can be freed.
+ */
+ if (tdx_hpa_range_not_free(err))
+ return 1;
+
+ return 0;
+}
+
+static int tdx_pamt_get(struct page *page, enum pg_level level)
+{
+ unsigned long hpa = page_to_phys(page);
+ atomic_t *pamt_refcount;
+ LIST_HEAD(pamt_pages);
+ int ret;
+
+ if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+ return 0;
+
+ /*
+ * Only PAMT_4K is allocated dynamically. PAMT_2M and PAMT_1G is
+ * allocated statically on TDX module initialization.
+ */
+ if (level != PG_LEVEL_4K)
+ return 0;
+
+ pamt_refcount = tdx_get_pamt_refcount(hpa);
+ WARN_ON_ONCE(atomic_read(pamt_refcount) < 0);
+
+ if (atomic_inc_not_zero(pamt_refcount))
+ return 0;
+
+ if (tdx_alloc_pamt_pages(&pamt_pages))
+ return -ENOMEM;
+
+ ret = tdx_pamt_add(pamt_refcount, hpa, &pamt_pages);
+ if (ret)
+ tdx_free_pamt_pages(&pamt_pages);
+
+ return ret >= 0 ? 0 : ret;
+}
+
+static void tdx_pamt_put(struct page *page, enum pg_level level)
+{
+ unsigned long hpa = page_to_phys(page);
+ atomic_t *pamt_refcount;
+ LIST_HEAD(pamt_pages);
+ u64 err;
+
+ if (!tdx_supports_dynamic_pamt(&tdx_sysinfo))
+ return;
+
+ if (level != PG_LEVEL_4K)
+ return;
+
+ hpa = ALIGN_DOWN(hpa, PMD_SIZE);
+
+ pamt_refcount = tdx_get_pamt_refcount(hpa);
+ if (!atomic_dec_and_test(pamt_refcount))
+ return;
+
+ scoped_guard(spinlock, &pamt_lock) {
+ /* Lost race against tdx_pamt_add()? */
+ if (atomic_read(pamt_refcount) != 0)
+ return;
+
+ err = tdh_phymem_pamt_remove(hpa | TDX_PS_2M, &pamt_pages);
+
+ if (err) {
+ atomic_inc(pamt_refcount);
+ pr_err("TDH_PHYMEM_PAMT_REMOVE failed: %#llx\n", err);
+ return;
+ }
+ }
+
+ tdx_free_pamt_pages(&pamt_pages);
+}
+
+struct page *tdx_alloc_page(void)
+{
+ struct page *page;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return NULL;
+
+ if (tdx_pamt_get(page, PG_LEVEL_4K)) {
+ __free_page(page);
+ return NULL;
+ }
+
+ return page;
+}
+EXPORT_SYMBOL_GPL(tdx_alloc_page);
+
+void tdx_free_page(struct page *page)
+{
+ if (!page)
+ return;
+
+ tdx_pamt_put(page, PG_LEVEL_4K);
+ __free_page(page);
+}
+EXPORT_SYMBOL_GPL(tdx_free_page);
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index 82bb82be8567..46c4214b79fb 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -46,6 +46,8 @@
#define TDH_PHYMEM_PAGE_WBINVD 41
#define TDH_VP_WR 43
#define TDH_SYS_CONFIG 45
+#define TDH_PHYMEM_PAMT_ADD 58
+#define TDH_PHYMEM_PAMT_REMOVE 59
/*
* SEAMCALL leaf:
--
2.47.2
Powered by blists - more mailing lists