[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241112073848.22298-1-yan.y.zhao@intel.com>
Date: Tue, 12 Nov 2024 15:38:48 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: pbonzini@...hat.com,
seanjc@...gle.com,
kvm@...r.kernel.org,
dave.hansen@...ux.intel.com
Cc: rick.p.edgecombe@...el.com,
kai.huang@...el.com,
adrian.hunter@...el.com,
reinette.chatre@...el.com,
xiaoyao.li@...el.com,
tony.lindgren@...el.com,
binbin.wu@...ux.intel.com,
dmatlack@...gle.com,
isaku.yamahata@...el.com,
isaku.yamahata@...il.com,
nik.borisov@...e.com,
linux-kernel@...r.kernel.org,
x86@...nel.org
Subject: [PATCH v2 22/24] KVM: TDX: Finalize VM initialization
From: Isaku Yamahata <isaku.yamahata@...el.com>
Introduce a new VM-scoped KVM_MEMORY_ENCRYPT_OP IOCTL subcommand,
KVM_TDX_FINALIZE_VM, to perform TD Measurement Finalization.
The API documentation is provided in a separate patch:
“Documentation/virt/kvm: Document on Trust Domain Extensions (TDX)”.
Enhance TDX’s set_external_spte() hook to record the pre-mapping count
instead of returning without action when the TD is not finalized.
Adjust the pre-mapping count when pages are added or if the mapping is
dropped.
Set pre_fault_allowed to true after the finalization is complete.
Note: TD Measurement Finalization is the process by which the initial state
of the TDX VM is measured for attestation purposes. It uses the SEAMCALL
TDH.MR.FINALIZE, after which:
1. The VMM can no longer add TD private pages with arbitrary content.
2. The TDX VM becomes runnable.
Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
Co-developed-by: Adrian Hunter <adrian.hunter@...el.com>
Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@...el.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@...el.com>
Signed-off-by: Yan Zhao <yan.y.zhao@...el.com>
---
TDX MMU part 2 v2
- Merge changes from patch "KVM: TDX: Premap initial guest memory" into
this patch (Paolo)
- Consolidate nr_premapped counting into this patch (Paolo)
- Page level check should be (and is) in tdx_sept_set_private_spte() in
patch "KVM: TDX: Implement hooks to propagate changes of TDP MMU mirror
page table" not in tdx_mem_page_record_premap_cnt() (Paolo)
- Protect finalization using kvm->slots_lock (Paolo)
- Set kvm->arch.pre_fault_allowed to true after finalization is done
(Paolo)
- Add a memory barrier to ensure correct ordering of the updates to
kvm_tdx->finalized and kvm->arch.pre_fault_allowed (Adrian)
- pre_fault_allowed must not be true before finalization is done.
Highlight that fact by checking it in tdx_mem_page_record_premap_cnt()
(Adrian)
- No need for is_td_finalized() (Rick)
- Fixup SEAMCALL call sites due to function parameter changes to SEAMCALL
wrappers (Kai)
- Add nr_premapped where it's first used (Tao)
TDX MMU part 2 v1:
- Added premapped check.
- Update for the wrapper functions for SEAMCALLs. (Sean)
- Add check if nr_premapped is zero. If not, return error.
- Use KVM_BUG_ON() in tdx_td_finalizer() for consistency.
- Change tdx_td_finalizemr() to take struct kvm_tdx_cmd *cmd and return error
(Adrian)
- Handle TDX_OPERAND_BUSY case (Adrian)
- Updates from seamcall overhaul (Kai)
- Rename error->hw_error
v18:
- Remove the change of tools/arch/x86/include/uapi/asm/kvm.h.
v15:
- removed unconditional tdx_track() by tdx_flush_tlb_current() that
does tdx_track().
---
arch/x86/include/uapi/asm/kvm.h | 1 +
arch/x86/kvm/vmx/tdx.c | 78 ++++++++++++++++++++++++++++++---
arch/x86/kvm/vmx/tdx.h | 3 ++
3 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a19cd84cec76..eee6de05f261 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -932,6 +932,7 @@ enum kvm_tdx_cmd_id {
KVM_TDX_INIT_VM,
KVM_TDX_INIT_VCPU,
KVM_TDX_INIT_MEM_REGION,
+ KVM_TDX_FINALIZE_VM,
KVM_TDX_GET_CPUID,
KVM_TDX_CMD_NR_MAX,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 15cedacd717a..acaa11be1031 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -563,6 +563,31 @@ static int tdx_mem_page_aug(struct kvm *kvm, gfn_t gfn,
return 0;
}
+/*
+ * KVM_TDX_INIT_MEM_REGION calls kvm_gmem_populate() to get guest pages and
+ * tdx_gmem_post_populate() to premap page table pages into private EPT.
+ * Mapping guest pages into private EPT before TD is finalized should use a
+ * seamcall TDH.MEM.PAGE.ADD(), which copies page content from a source page
+ * from user to target guest pages to be added. This source page is not
+ * available via common interface kvm_tdp_map_page(). So, currently,
+ * kvm_tdp_map_page() only premaps guest pages into KVM mirrored root.
+ * A counter nr_premapped is increased here to record status. The counter will
+ * be decreased after TDH.MEM.PAGE.ADD() is called after the kvm_tdp_map_page()
+ * in tdx_gmem_post_populate().
+ */
+static int tdx_mem_page_record_premap_cnt(struct kvm *kvm, gfn_t gfn,
+ enum pg_level level, kvm_pfn_t pfn)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+ if (KVM_BUG_ON(kvm->arch.pre_fault_allowed, kvm))
+ return -EINVAL;
+
+ /* nr_premapped will be decreased when tdh_mem_page_add() is called. */
+ atomic64_inc(&kvm_tdx->nr_premapped);
+ return 0;
+}
+
int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
enum pg_level level, kvm_pfn_t pfn)
{
@@ -582,14 +607,15 @@ int tdx_sept_set_private_spte(struct kvm *kvm, gfn_t gfn,
*/
get_page(pfn_to_page(pfn));
+ /*
+ * To match ordering of 'finalized' and 'pre_fault_allowed' in
+ * tdx_td_finalizemr().
+ */
+ smp_rmb();
if (likely(kvm_tdx->state == TD_STATE_RUNNABLE))
return tdx_mem_page_aug(kvm, gfn, level, pfn);
- /*
- * TODO: KVM_TDX_INIT_MEM_REGION support to populate before finalize
- * comes here for the initial memory.
- */
- return -EOPNOTSUPP;
+ return tdx_mem_page_record_premap_cnt(kvm, gfn, level, pfn);
}
static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
@@ -621,10 +647,12 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn,
if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE &&
err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) {
/*
- * This page was mapped with KVM_MAP_MEMORY, but
- * KVM_TDX_INIT_MEM_REGION is not issued yet.
+ * Page is mapped by KVM_TDX_INIT_MEM_REGION, but hasn't called
+ * tdh_mem_page_add().
*/
if (!is_last_spte(entry, level) || !(entry & VMX_EPT_RWX_MASK)) {
+ WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped));
+ atomic64_dec(&kvm_tdx->nr_premapped);
tdx_unpin(kvm, pfn);
return 0;
}
@@ -1368,6 +1396,36 @@ void tdx_flush_tlb_all(struct kvm_vcpu *vcpu)
ept_sync_global();
}
+static int tdx_td_finalizemr(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
+{
+ struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
+
+ guard(mutex)(&kvm->slots_lock);
+
+ if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE)
+ return -EINVAL;
+ /*
+ * Pages are pending for KVM_TDX_INIT_MEM_REGION to issue
+ * TDH.MEM.PAGE.ADD().
+ */
+ if (atomic64_read(&kvm_tdx->nr_premapped))
+ return -EINVAL;
+
+ cmd->hw_error = tdh_mr_finalize(kvm_tdx->tdr_pa);
+ if ((cmd->hw_error & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY)
+ return -EAGAIN;
+ if (KVM_BUG_ON(cmd->hw_error, kvm)) {
+ pr_tdx_error(TDH_MR_FINALIZE, cmd->hw_error);
+ return -EIO;
+ }
+
+ kvm_tdx->state = TD_STATE_RUNNABLE;
+ /* TD_STATE_RUNNABLE must be set before 'pre_fault_allowed' */
+ smp_wmb();
+ kvm->arch.pre_fault_allowed = true;
+ return 0;
+}
+
int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
{
struct kvm_tdx_cmd tdx_cmd;
@@ -1392,6 +1450,9 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
case KVM_TDX_INIT_VM:
r = tdx_td_init(kvm, &tdx_cmd);
break;
+ case KVM_TDX_FINALIZE_VM:
+ r = tdx_td_finalizemr(kvm, &tdx_cmd);
+ break;
default:
r = -EINVAL;
goto out;
@@ -1656,6 +1717,9 @@ static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
goto out;
}
+ WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped));
+ atomic64_dec(&kvm_tdx->nr_premapped);
+
if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) {
for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) {
err = tdh_mr_extend(kvm_tdx->tdr_pa, gpa + i, &entry,
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 727bcf25d731..aeddf2bb0a94 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -32,6 +32,9 @@ struct kvm_tdx {
u64 tsc_offset;
enum kvm_tdx_state state;
+
+ /* For KVM_TDX_INIT_MEM_REGION. */
+ atomic64_t nr_premapped;
};
/* TDX module vCPU states */
--
2.43.2
Powered by blists - more mailing lists