[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Z7wuq9hQS33z7Lvl@yzhao56-desk.sh.intel.com>
Date: Mon, 24 Feb 2025 16:32:43 +0800
From: Yan Zhao <yan.y.zhao@...el.com>
To: Sean Christopherson <seanjc@...gle.com>
CC: Rick P Edgecombe <rick.p.edgecombe@...el.com>, "pbonzini@...hat.com"
<pbonzini@...hat.com>, Kai Huang <kai.huang@...el.com>, "kvm@...r.kernel.org"
<kvm@...r.kernel.org>, "sean.j.christopherson@...el.com"
<sean.j.christopherson@...el.com>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, Isaku Yamahata <isaku.yamahata@...el.com>,
"tony.lindgren@...ux.intel.com" <tony.lindgren@...ux.intel.com>
Subject: Re: [PATCH 20/30] KVM: TDX: create/destroy VM structure
On Fri, Feb 21, 2025 at 05:38:36PM -0800, Sean Christopherson wrote:
> On Sat, Feb 22, 2025, Rick P Edgecombe wrote:
> > On Thu, 2025-02-20 at 17:08 -0800, Sean Christopherson wrote:
> > > Argh, after digging more, this isn't actually true. The separate "unload MMUs"
> > > code is leftover from when KVM rather stupidly tried to free all MMU pages when
> > > freeing a vCPU.
> > >
> > > Commit 7b53aa565084 ("KVM: Fix vcpu freeing for guest smp") "fixed" things by
> > > unloading MMUs before destroying vCPUs, but the underlying problem was trying to
> > > free _all_ MMU pages when destroying a single vCPU. That eventually got fixed
> > > for good (haven't checked when), but the separate MMU unloading never got cleaned
> > > up.
> > >
> > > So, scratch the mmu_destroy() idea. But I still want/need to move vCPU destruction
> > > before vm_destroy.
> > >
> > > Now that kvm_arch_pre_destroy_vm() is a thing, one idea would be to add
> > > kvm_x86_ops.pre_destroy_vm(), which would pair decently well with the existing
> > > call to kvm_mmu_pre_destroy_vm().
> >
> > So:
> > kvm_x86_call(vm_destroy)(kvm); -> tdx_mmu_release_hkid()
> > kvm_destroy_vcpus(kvm); -> tdx_vcpu_free() -> reclaim
> > static_call_cond(kvm_x86_vm_free)(kvm); -> reclaim
> >
> > To:
> > (pre_destroy_vm)() -> tdx_mmu_release_hkid()
> > kvm_destroy_vcpus(kvm); -> reclaim
> > kvm_x86_call(vm_destroy)(kvm); -> nothing
> > static_call_cond(kvm_x86_vm_free)(kvm); -> reclaim
>
> I was thinking this last one could go away, and TDX could reclaim at vm_destroy?
> Or is that not possible because it absolutely must come dead last?
Hmm, below change works on my end.
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 50cf27473ffb..79406bf07a1c 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -21,7 +21,7 @@ KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
KVM_X86_OP(vm_init)
KVM_X86_OP_OPTIONAL(vm_destroy)
-KVM_X86_OP_OPTIONAL(vm_free)
+KVM_X86_OP_OPTIONAL(vm_pre_destroy)
KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
KVM_X86_OP(vcpu_create)
KVM_X86_OP(vcpu_free)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8d15e604613b..2d5b6d34d30e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1677,7 +1677,7 @@ struct kvm_x86_ops {
unsigned int vm_size;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
- void (*vm_free)(struct kvm *kvm);
+ void (*vm_pre_destroy)(struct kvm *kvm);
/* Create, but do not attach this VCPU */
int (*vcpu_precreate)(struct kvm *kvm);
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 1fa0364faa60..a8acf98dfadd 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -76,18 +76,19 @@ static int vt_vm_init(struct kvm *kvm)
return vmx_vm_init(kvm);
}
-static void vt_vm_destroy(struct kvm *kvm)
+static void vt_vm_pre_destroy(struct kvm *kvm)
{
if (is_td(kvm))
return tdx_mmu_release_hkid(kvm);
- vmx_vm_destroy(kvm);
}
-static void vt_vm_free(struct kvm *kvm)
+static void vt_vm_destroy(struct kvm *kvm)
{
if (is_td(kvm))
- tdx_vm_free(kvm);
+ return tdx_vm_free(kvm);
+
+ vmx_vm_destroy(kvm);
}
static int vt_vcpu_precreate(struct kvm *kvm)
@@ -914,7 +915,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.vm_init = vt_vm_init,
.vm_destroy = vt_vm_destroy,
- .vm_free = vt_vm_free,
+ .vm_pre_destroy = vt_vm_pre_destroy,
.vcpu_precreate = vt_vcpu_precreate,
.vcpu_create = vt_vcpu_create,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ae96449e6e2..cb2672a59715 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -12889,6 +12889,7 @@ EXPORT_SYMBOL_GPL(__x86_set_memory_region);
void kvm_arch_pre_destroy_vm(struct kvm *kvm)
{
kvm_mmu_pre_destroy_vm(kvm);
+ static_call_cond(kvm_x86_vm_pre_destroy)(kvm);
}
void kvm_arch_destroy_vm(struct kvm *kvm)
@@ -12908,7 +12909,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
kvm_unload_vcpu_mmus(kvm);
- kvm_x86_call(vm_destroy)(kvm);
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
@@ -12919,7 +12919,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_page_track_cleanup(kvm);
kvm_xen_destroy_vm(kvm);
kvm_hv_destroy_vm(kvm);
- static_call_cond(kvm_x86_vm_free)(kvm);
+ kvm_x86_call(vm_destroy)(kvm);
}
static void memslot_rmap_free(struct kvm_memory_slot *slot)
Powered by blists - more mailing lists