[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b3b61de3224f17792bb26e0e9bcf267cf4ebbcc7.1749672978.git.afranji@google.com>
Date: Wed, 11 Jun 2025 21:16:33 +0000
From: Ryan Afranji <afranji@...gle.com>
To: kvm@...r.kernel.org, linux-kernel@...r.kernel.org, x86@...nel.org
Cc: sagis@...gle.com, bp@...en8.de, chao.p.peng@...ux.intel.com,
dave.hansen@...ux.intel.com, dmatlack@...gle.com, erdemaktas@...gle.com,
isaku.yamahata@...el.com, kai.huang@...el.com, mingo@...hat.com,
pbonzini@...hat.com, seanjc@...gle.com, tglx@...utronix.de,
zhi.wang.linux@...il.com, ackerleytng@...gle.com, andrew.jones@...ux.dev,
david@...hat.com, hpa@...or.com, kirill.shutemov@...ux.intel.com,
linux-kselftest@...r.kernel.org, tabba@...gle.com, vannapurve@...gle.com,
yan.y.zhao@...el.com, rick.p.edgecombe@...el.com,
Ryan Afranji <afranji@...gle.com>
Subject: [RFC PATCH v2 06/10] KVM: TDX: Add core logic for TDX intra-host migration
From: Sagi Shahar <sagis@...gle.com>
Adds the core logic for transferring state between source and
destination TDs during intra-host migration.
Signed-off-by: Sagi Shahar <sagis@...gle.com>
Co-developed-by: Ryan Afranji <afranji@...gle.com>
Signed-off-by: Ryan Afranji <afranji@...gle.com>
---
arch/x86/kvm/vmx/tdx.c | 193 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 192 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 4582f94175b7..268aca28d878 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -3534,9 +3534,200 @@ static __always_inline bool tdx_finalized(struct kvm *kvm)
return tdx_kvm->state == TD_STATE_RUNNABLE;
}
+#define MAX_APIC_VECTOR 256
+
+static int tdx_migrate_vcpus(struct kvm *dst, struct kvm *src)
+{
+ struct kvm_vcpu *src_vcpu;
+ struct kvm_tdx *dst_tdx;
+ unsigned long i;
+
+ dst_tdx = to_kvm_tdx(dst);
+
+ kvm_for_each_vcpu(i, src_vcpu, src)
+ tdx_flush_vp_on_cpu(src_vcpu);
+
+ /* Copy per-vCPU state. */
+ kvm_for_each_vcpu(i, src_vcpu, src) {
+ struct vcpu_tdx *dst_tdx_vcpu, *src_tdx_vcpu;
+ struct kvm_lapic_state src_lapic_state;
+ struct kvm_vcpu *dst_vcpu;
+ u64 apic_base;
+ u32 vector;
+ int ret;
+
+ src_tdx_vcpu = to_tdx(src_vcpu);
+ dst_vcpu = kvm_get_vcpu(dst, i);
+ dst_tdx_vcpu = to_tdx(dst_vcpu);
+
+ dst_vcpu->cpu = -1;
+
+ /* Destination vCPU initialization skipped so do it here. */
+ apic_base = APIC_DEFAULT_PHYS_BASE | LAPIC_MODE_X2APIC |
+ (kvm_vcpu_is_reset_bsp(dst_vcpu) ?
+ MSR_IA32_APICBASE_BSP : 0);
+ if (kvm_apic_set_base(dst_vcpu, apic_base, true))
+ return -EINVAL;
+
+ /* Copy lapic state. */
+ ret = kvm_apic_get_state(src_vcpu, &src_lapic_state);
+ if (ret)
+ return -EINVAL;
+
+ ret = kvm_apic_set_state(dst_vcpu, &src_lapic_state);
+ if (ret)
+ return -EINVAL;
+
+ /*
+ * pi_desc stores state of posted interrupts for VMs which are
+ * processed by pcpu during VM entry/runtime. For
+ * non-confidential VMs, this storage is synchronized to vcpu
+ * state using set_lapic_state(sync_pir_to_virr).
+ *
+ * For TDX VMs, KVM doesn't have access to virtual lapic page,
+ * so in order to preserve the interrupt state, copy over
+ * pi_desc contents to destination VM during copyless migration.
+ */
+ dst_tdx_vcpu->vt = src_tdx_vcpu->vt;
+ for (vector = 0; vector < MAX_APIC_VECTOR; vector++) {
+ if (pi_test_pir(vector, &src_tdx_vcpu->vt.pi_desc)) {
+ __vmx_deliver_posted_interrupt(
+ dst_vcpu,
+ &dst_tdx_vcpu->vt.pi_desc,
+ vector);
+ }
+ }
+
+ /* Copy non-TDX vCPU state. */
+ memcpy(dst_vcpu->arch.regs, src_vcpu->arch.regs,
+ NR_VCPU_REGS * sizeof(src_vcpu->arch.regs[0]));
+
+ dst_vcpu->arch.regs_avail = src_vcpu->arch.regs_avail;
+ dst_vcpu->arch.regs_dirty = src_vcpu->arch.regs_dirty;
+ dst_vcpu->arch.tsc_offset = dst_tdx->tsc_offset;
+ dst_vcpu->arch.guest_state_protected =
+ src_vcpu->arch.guest_state_protected;
+ dst_vcpu->arch.xfd_no_write_intercept =
+ src_vcpu->arch.xfd_no_write_intercept;
+ dst_vcpu->arch.tsc_offset = dst_tdx->tsc_offset;
+
+ /* Copy TD structures. */
+ dst_tdx_vcpu->vp.tdvpr_page = src_tdx_vcpu->vp.tdvpr_page;
+ dst_tdx_vcpu->vp.tdcx_pages = src_tdx_vcpu->vp.tdcx_pages;
+
+ td_vmcs_write64(dst_tdx_vcpu, POSTED_INTR_DESC_ADDR,
+ __pa(&dst_tdx_vcpu->vt.pi_desc));
+
+ /* Copy current vCPU status. */
+ dst_tdx_vcpu->ext_exit_qualification =
+ src_tdx_vcpu->ext_exit_qualification;
+ dst_tdx_vcpu->exit_gpa = src_tdx_vcpu->exit_gpa;
+ dst_tdx_vcpu->vp_enter_args = src_tdx_vcpu->vp_enter_args;
+ dst_tdx_vcpu->vp_enter_ret = src_tdx_vcpu->vp_enter_ret;
+ dst_tdx_vcpu->guest_entered = src_tdx_vcpu->guest_entered;
+ dst_tdx_vcpu->map_gpa_next = src_tdx_vcpu->map_gpa_next;
+ dst_tdx_vcpu->map_gpa_end = src_tdx_vcpu->map_gpa_end;
+
+ /* Copy mirror EPT tables. */
+ vcpu_load(dst_vcpu);
+ if (kvm_mmu_move_mirror_pages_from(dst_vcpu, src_vcpu)) {
+ vcpu_put(dst_vcpu);
+ return -EINVAL;
+ }
+ vcpu_put(dst_vcpu);
+
+ dst_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+ dst_tdx_vcpu->state = VCPU_TD_STATE_INITIALIZED;
+
+ /*
+ * Set these source's vCPU migrated structures to NULL to avoid
+ * freeing them during source VM shutdown.
+ */
+ src_tdx_vcpu->vp.tdvpr_page = NULL;
+ src_tdx_vcpu->vp.tdcx_pages = NULL;
+ }
+
+ return 0;
+}
+
static int tdx_migrate_from(struct kvm *dst, struct kvm *src)
{
- return -EINVAL;
+ struct kvm_tdx *src_tdx, *dst_tdx;
+ bool charged = false;
+ int ret;
+
+ src_tdx = to_kvm_tdx(src);
+ dst_tdx = to_kvm_tdx(dst);
+
+ ret = -EINVAL;
+
+ if (src_tdx->state != TD_STATE_RUNNABLE) {
+ pr_warn("Cannot migrate from a non finalized VM\n");
+ goto abort;
+ }
+
+ /* Transfer miscellaneous cgroup. */
+ dst_tdx->misc_cg = get_current_misc_cg();
+ if (dst_tdx->misc_cg != src_tdx->misc_cg) {
+ ret = misc_cg_try_charge(MISC_CG_RES_TDX, dst_tdx->misc_cg, 1);
+ if (ret)
+ goto abort_dst_cgroup;
+ charged = true;
+ }
+
+ dst_tdx->hkid = src_tdx->hkid;
+
+ /* Copy VM data. */
+ dst_tdx->attributes = src_tdx->attributes;
+ dst_tdx->xfam = src_tdx->xfam;
+ dst_tdx->tsc_offset = src_tdx->tsc_offset;
+ dst_tdx->tsc_multiplier = src_tdx->tsc_multiplier;
+ dst_tdx->nr_premapped = src_tdx->nr_premapped;
+ dst_tdx->wait_for_sept_zap = src_tdx->wait_for_sept_zap;
+ dst_tdx->kvm.arch.gfn_direct_bits = src_tdx->kvm.arch.gfn_direct_bits;
+
+ /* Copy TD structures. */
+ dst_tdx->td.tdcs_nr_pages = src_tdx->td.tdcs_nr_pages;
+ dst_tdx->td.tdcx_nr_pages = src_tdx->td.tdcx_nr_pages;
+ dst_tdx->td.tdr_page = src_tdx->td.tdr_page;
+ dst_tdx->td.tdcs_pages = src_tdx->td.tdcs_pages;
+
+ /* Copy per-vCPU state. */
+ ret = tdx_migrate_vcpus(dst, src);
+ if (ret)
+ goto late_abort;
+
+ dst->mem_attr_array.xa_head = src->mem_attr_array.xa_head;
+ src->mem_attr_array.xa_head = NULL;
+
+ dst_tdx->state = TD_STATE_RUNNABLE;
+
+ /*
+ * Set these source's vCPU migrated structures to NULL to avoid
+ * freeing them during source VM shutdown.
+ */
+ src_tdx->hkid = -1;
+ src_tdx->td.tdr_page = NULL;
+ src_tdx->td.tdcs_pages = NULL;
+
+ return 0;
+
+late_abort:
+ /*
+ * If we aborted after the state transfer already started, the src VM
+ * is no longer valid.
+ */
+ kvm_vm_dead(src);
+
+abort_dst_cgroup:
+ if (charged)
+ misc_cg_uncharge(MISC_CG_RES_TDX, dst_tdx->misc_cg, 1);
+ put_misc_cg(dst_tdx->misc_cg);
+ dst_tdx->misc_cg = NULL;
+abort:
+ dst_tdx->hkid = -1;
+ dst_tdx->td.tdr_page = 0;
+ return ret;
}
int tdx_vm_move_enc_context_from(struct kvm *kvm, struct kvm *src_kvm)
--
2.50.0.rc1.591.g9c95f17f64-goog
Powered by blists - more mailing lists