[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250807094628.4790-1-yan.y.zhao@intel.com>
Date: Thu, 7 Aug 2025 17:46:28 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
seanjc@...gle.com
Cc: linux-kernel@...r.kernel.org,
kvm@...r.kernel.org,
x86@...nel.org,
rick.p.edgecombe@...el.com,
dave.hansen@...el.com,
kas@...nel.org,
tabba@...gle.com,
ackerleytng@...gle.com,
quic_eberman@...cinc.com,
michael.roth@....com,
david@...hat.com,
vannapurve@...gle.com,
vbabka@...e.cz,
thomas.lendacky@....com,
pgonda@...gle.com,
zhiquan1.li@...el.com,
fan.du@...el.com,
jun.miao@...el.com,
ira.weiny@...el.com,
isaku.yamahata@...el.com,
xiaoyao.li@...el.com,
binbin.wu@...ux.intel.com,
chao.p.peng@...el.com,
yan.y.zhao@...el.com
Subject: [RFC PATCH v2 23/23] KVM: TDX: Turn on PG_LEVEL_2M after TD is RUNNABLE
Turn on PG_LEVEL_2M in tdx_gmem_private_max_mapping_level() when TD is
RUNNABLE.
Update the warnings and KVM_BUG_ON() info elsewhere to match that 2MB
mappings are permitted after TD is RUNNABLE.
Opportunistically, remove the unused params "gfn" and "pfn" in
tdx_mem_page_record_premap_cnt().
Signed-off-by: Xiaoyao Li <xiaoyao.li@...el.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
RFC v2:
- Merged RFC v1's patch 4 (forcing PG_LEVEL_4K before TD runnable) with
patch 9 (allowing PG_LEVEL_2M after TD runnable).
---
arch/x86/kvm/vmx/tdx.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 6e061d659639..a3e1ac044ee9 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -1633,12 +1633,11 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
* The counter has to be zero on KVM_TDX_FINALIZE_VM, to ensure that there
* are no half-initialized shared EPT pages.
*/
-static int tdx_mem_page_record_premap_cnt(struct kvm *kvm, gfn_t gfn,
- enum pg_level level, kvm_pfn_t pfn)
+static int tdx_mem_page_record_premap_cnt(struct kvm *kvm, enum pg_level level)
{
struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
- if (KVM_BUG_ON(kvm->arch.pre_fault_allowed, kvm))
+ if (KVM_BUG_ON(kvm->arch.pre_fault_allowed || level != PG_LEVEL_4K, kvm))
return -EINVAL;
/* nr_premapped will be decreased when tdh_mem_page_add() is called. */
@@ -1667,10 +1666,6 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
if (ret)
return ret;
- /* TODO: handle large pages. */
- if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
- return -EINVAL;
-
/*
* Read 'pre_fault_allowed' before 'kvm_tdx->state'; see matching
* barrier in tdx_td_finalize().
@@ -1680,7 +1675,7 @@ static int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
if (likely(kvm_tdx->state == TD_STATE_RUNNABLE))
ret = tdx_mem_page_aug(kvm, gfn, level, page);
else
- ret = tdx_mem_page_record_premap_cnt(kvm, gfn, level, pfn);
+ ret = tdx_mem_page_record_premap_cnt(kvm, level);
if (ret)
tdx_pamt_put(page, level);
@@ -1697,8 +1692,8 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
gpa_t gpa = gfn_to_gpa(gfn);
u64 err, entry, level_state;
- /* TODO: handle large pages. */
- if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm))
+ /* Large page is not supported before TD runnable,*/
+ if (KVM_BUG_ON(kvm_tdx->state != TD_STATE_RUNNABLE && level != PG_LEVEL_4K, kvm))
return -EINVAL;
if (KVM_BUG_ON(!is_hkid_assigned(kvm_tdx), kvm))
@@ -1791,7 +1786,7 @@ static int tdx_sept_link_private_spt(struct kvm *kvm, gfn_t gfn,
static int tdx_is_sept_zap_err_due_to_premap(struct kvm_tdx *kvm_tdx, u64 err,
u64 entry, int level)
{
- if (!err || kvm_tdx->state == TD_STATE_RUNNABLE)
+ if (!err || kvm_tdx->state == TD_STATE_RUNNABLE || level > PG_LEVEL_4K)
return false;
if (err != (TDX_EPT_ENTRY_STATE_INCORRECT | TDX_OPERAND_ID_RCX))
@@ -1811,8 +1806,8 @@ static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn,
gpa_t gpa = gfn_to_gpa(gfn) & KVM_HPAGE_MASK(level);
u64 err, entry, level_state;
- /* For now large page isn't supported yet. */
- WARN_ON_ONCE(level != PG_LEVEL_4K);
+ /* Large page is not supported before TD runnable,*/
+ WARN_ON_ONCE(kvm_tdx->state != TD_STATE_RUNNABLE && level != PG_LEVEL_4K);
err = tdh_mem_range_block(&kvm_tdx->td, gpa, tdx_level, &entry, &level_state);
@@ -1993,6 +1988,9 @@ static int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
struct folio *folio = page_folio(page);
int ret;
+ WARN_ON_ONCE(folio_page_idx(folio, page) + KVM_PAGES_PER_HPAGE(level) >
+ folio_nr_pages(folio));
+
if (!is_hkid_assigned(to_kvm_tdx(kvm))) {
KVM_BUG_ON(!kvm->vm_dead, kvm);
@@ -3470,7 +3468,10 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp)
int tdx_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
{
- return PG_LEVEL_4K;
+ if (unlikely(to_kvm_tdx(kvm)->state != TD_STATE_RUNNABLE))
+ return PG_LEVEL_4K;
+
+ return PG_LEVEL_2M;
}
static int tdx_online_cpu(unsigned int cpu)
--
2.43.2
Powered by blists - more mailing lists