[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250612130508.3213505-1-kirill.shutemov@linux.intel.com>
Date: Thu, 12 Jun 2025 16:05:08 +0300
From: "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To: chao.gao@...el.com
Cc: bp@...en8.de,
dave.hansen@...ux.intel.com,
isaku.yamahata@...el.com,
kai.huang@...el.com,
kirill.shutemov@...ux.intel.com,
kvm@...r.kernel.org,
linux-coco@...ts.linux.dev,
linux-kernel@...r.kernel.org,
mingo@...hat.com,
pbonzini@...hat.com,
rick.p.edgecombe@...el.com,
seanjc@...gle.com,
tglx@...utronix.de,
x86@...nel.org,
yan.y.zhao@...el.com
Subject: [PATCHv2.1 08/12] KVM: TDX: Handle PAMT allocation in fault path
There are two distinct cases when the kernel needs to allocate PAMT
memory in the fault path: for SEPT page tables in tdx_sept_link_private_spt()
and for leaf pages in tdx_sept_set_private_spte().
These code paths run in atomic context. Use a pre-allocated per-VCPU
pool for memory allocations.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
arch/x86/include/asm/tdx.h | 4 ++++
arch/x86/kvm/vmx/tdx.c | 40 ++++++++++++++++++++++++++++++++-----
arch/x86/virt/vmx/tdx/tdx.c | 23 +++++++++++++++------
virt/kvm/kvm_main.c | 1 +
4 files changed, 57 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 47092eb13eb3..39f8dd7e0f06 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -116,6 +116,10 @@ u32 tdx_get_nr_guest_keyids(void);
void tdx_guest_keyid_free(unsigned int keyid);
int tdx_nr_pamt_pages(void);
+int tdx_pamt_get(struct page *page, enum pg_level level,
+ struct page *(alloc)(void *data), void *data);
+void tdx_pamt_put(struct page *page, enum pg_level level);
+
struct page *tdx_alloc_page(void);
void tdx_free_page(struct page *page);
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 36c3c9f8a62c..2f058e17fd73 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1537,16 +1537,31 @@ static int tdx_mem_page_record_premap_cnt(struct kvm *kvm, gfn_t gfn,
return 0;
}
+static struct page *tdx_alloc_pamt_page_atomic(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ void *p;
+
+ p = kvm_mmu_memory_cache_alloc(&vcpu->arch.pamt_page_cache);
+ return virt_to_page(p);
+}
+
int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
struct page *page = pfn_to_page(pfn);
+ int ret;
/* TODO: handle large pages. */
if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
return -EINVAL;
+ ret = tdx_pamt_get(page, level, tdx_alloc_pamt_page_atomic, vcpu);
+ if (ret)
+ return ret;
+
/*
* Because guest_memfd doesn't support page migration with
* a_ops->migrate_folio (yet), no callback is triggered for KVM on page
@@ -1562,10 +1577,16 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
* barrier in tdx_td_finalize().
*/
smp_rmb();
- if (likely(kvm_tdx->state == TD_STATE_RUNNABLE))
- return tdx_mem_page_aug(kvm, gfn, level, page);
- return tdx_mem_page_record_premap_cnt(kvm, gfn, level, pfn);
+ if (likely(kvm_tdx->state == TD_STATE_RUNNABLE))
+ ret = tdx_mem_page_aug(kvm, gfn, level, page);
+ else
+ ret = tdx_mem_page_record_premap_cnt(kvm, gfn, level, pfn);
+
+ if (ret)
+ tdx_pamt_put(page, level);
+
+ return ret;
}
static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
@@ -1622,17 +1643,26 @@ int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
enum pg_level level, void *private_spt)
{
int tdx_level = pg_level_to_tdx_sept_level(level);
- gpa_t gpa = gfn_to_gpa(gfn);
+ struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct page *page = virt_to_page(private_spt);
+ gpa_t gpa = gfn_to_gpa(gfn);
u64 err, entry, level_state;
+ int ret;
+
+ ret = tdx_pamt_get(page, PG_LEVEL_4K, tdx_alloc_pamt_page_atomic, vcpu);
+ if (ret)
+ return ret;
err = tdh_mem_sept_add(&to_kvm_tdx(kvm)->td, gpa, tdx_level, page, &entry,
&level_state);
- if (unlikely(tdx_operand_busy(err)))
+ if (unlikely(tdx_operand_busy(err))) {
+ tdx_pamt_put(page, PG_LEVEL_4K);
return -EBUSY;
+ }
if (KVM_BUG_ON(err, kvm)) {
pr_tdx_error_2(TDH_MEM_SEPT_ADD, err, entry, level_state);
+ tdx_pamt_put(page, PG_LEVEL_4K);
return -EIO;
}
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 0cbf052c64e9..4fc9f4ae8165 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -2067,14 +2067,22 @@ static void tdx_free_pamt_pages(struct list_head *pamt_pages)
}
}
-static int tdx_alloc_pamt_pages(struct list_head *pamt_pages)
+static int tdx_alloc_pamt_pages(struct list_head *pamt_pages,
+ struct page *(alloc)(void *data), void *data)
{
for (int i = 0; i < tdx_nr_pamt_pages(); i++) {
- struct page *page = alloc_page(GFP_KERNEL);
+ struct page *page;
+
+ if (alloc)
+ page = alloc(data);
+ else
+ page = alloc_page(GFP_KERNEL);
+
if (!page) {
tdx_free_pamt_pages(pamt_pages);
return -ENOMEM;
}
+
list_add(&page->lru, pamt_pages);
}
return 0;
@@ -2130,7 +2138,8 @@ static int tdx_pamt_add(atomic_t *pamt_refcount, unsigned long hpa,
return 0;
}
-static int tdx_pamt_get(struct page *page, enum pg_level level)
+int tdx_pamt_get(struct page *page, enum pg_level level,
+ struct page *(alloc)(void *data), void *data)
{
unsigned long hpa = page_to_phys(page);
atomic_t *pamt_refcount;
@@ -2153,7 +2162,7 @@ static int tdx_pamt_get(struct page *page, enum pg_level level)
if (atomic_inc_not_zero(pamt_refcount))
return 0;
- if (tdx_alloc_pamt_pages(&pamt_pages))
+ if (tdx_alloc_pamt_pages(&pamt_pages, alloc, data))
return -ENOMEM;
ret = tdx_pamt_add(pamt_refcount, hpa, &pamt_pages);
@@ -2162,8 +2171,9 @@ static int tdx_pamt_get(struct page *page, enum pg_level level)
return ret >= 0 ? 0 : ret;
}
+EXPORT_SYMBOL_GPL(tdx_pamt_get);
-static void tdx_pamt_put(struct page *page, enum pg_level level)
+void tdx_pamt_put(struct page *page, enum pg_level level)
{
unsigned long hpa = page_to_phys(page);
atomic_t *pamt_refcount;
@@ -2198,6 +2208,7 @@ static void tdx_pamt_put(struct page *page, enum pg_level level)
tdx_free_pamt_pages(&pamt_pages);
}
+EXPORT_SYMBOL_GPL(tdx_pamt_put);
struct page *tdx_alloc_page(void)
{
@@ -2207,7 +2218,7 @@ struct page *tdx_alloc_page(void)
if (!page)
return NULL;
- if (tdx_pamt_get(page, PG_LEVEL_4K)) {
+ if (tdx_pamt_get(page, PG_LEVEL_4K, NULL, NULL)) {
__free_page(page);
return NULL;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eec82775c5bf..6add012532a0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -436,6 +436,7 @@ void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc)
BUG_ON(!p);
return p;
}
+EXPORT_SYMBOL_GPL(kvm_mmu_memory_cache_alloc);
#endif
static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
--
2.47.2
Powered by blists - more mailing lists