[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <dca247173aace1269ce8512ae2d3797289bb1718.camel@intel.com>
Date: Tue, 20 May 2025 22:49:48 +0000
From: "Huang, Kai" <kai.huang@...el.com>
To: "pbonzini@...hat.com" <pbonzini@...hat.com>, "seanjc@...gle.com"
<seanjc@...gle.com>
CC: "kvm@...r.kernel.org" <kvm@...r.kernel.org>, "vipinsh@...gle.com"
<vipinsh@...gle.com>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v3 2/3] KVM: x86: Use kvzalloc() to allocate VM struct
On Mon, 2025-05-19 at 08:39 -0700, Sean Christopherson wrote:
> On Sat, May 17, 2025, Paolo Bonzini wrote:
> > On 5/16/25 23:54, Sean Christopherson wrote:
> > > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> > > index 0ad1a6d4fb6d..d13e475c3407 100644
> > > --- a/arch/x86/kvm/svm/svm.c
> > > +++ b/arch/x86/kvm/svm/svm.c
> > > @@ -5675,6 +5675,8 @@ static int __init svm_init(void)
> > > {
> > > int r;
> > > + KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_svm);
> > > +
> > > __unused_size_checks();
> > > if (!kvm_is_svm_supported())
> > > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> > > index d1e02e567b57..e18dfada2e90 100644
> > > --- a/arch/x86/kvm/vmx/main.c
> > > +++ b/arch/x86/kvm/vmx/main.c
> > > @@ -64,6 +64,8 @@ static __init int vt_hardware_setup(void)
> > > vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
> > > }
> > > + KVM_SANITY_CHECK_VM_STRUCT_SIZE(kvm_tdx);
> >
> > I would put either both or no checks in main.c.
>
> Yeah, I agree the current split is ugly. I originally had 'em both in main.c,
> but then the assert effectively becomes dependent on CONFIG_KVM_INTEL_TDX=y.
>
> Aha! If we add a proper tdx_hardware_setup(), then there's a convenient location
> for the assert, IMO it's much easier to see/document the "TDX module not loaded"
> behavior, and the TDX-specific kvm_x86_ops hooks don't need to be visible symbols.
>
> I'll slot the below in, unless you've got a better idea.
Looks good to me too. Minor things below.
[...]
> +static int tdx_sept_remove_private_spte(struct kvm *kvm, gfn_t gfn,
> + enum pg_level level, kvm_pfn_t pfn)
> {
> struct page *page = pfn_to_page(pfn);
> int ret;
> @@ -3507,10 +3507,14 @@ int __init tdx_bringup(void)
> r = __tdx_bringup();
> if (r) {
> /*
> - * Disable TDX only but don't fail to load module if
> - * the TDX module could not be loaded. No need to print
> - * message saying "module is not loaded" because it was
> - * printed when the first SEAMCALL failed.
> + * Disable TDX only but don't fail to load module if the TDX
> + * module could not be loaded. No need to print message saying
> + * "module is not loaded" because it was printed when the first
> + * SEAMCALL failed. Don't bother unwinding the S-EPT hooks or
> + * vm_size, as kvm_x86_ops have already been finalized (and are
> + * intentionally not exported). The S-EPT code is unreachable,
> + * and allocating a few more bytes per VM in a should-be-rare
> + * failure scenario is a non-issue.
> */
> if (r == -ENODEV)
> goto success_disable_tdx;
> @@ -3524,3 +3528,19 @@ int __init tdx_bringup(void)
> enable_tdx = 0;
> return 0;
> }
> +
> +
> +void __init tdx_hardware_setup(void)
> +{
> + /*
> + * Note, if the TDX module can't be loaded, KVM TDX support will be
> + * disabled but KVM will continue loading (see tdx_bringup()).
> + */
This comment seems a little bit weird to me. I think what you meant here is the
@vm_size and those S-EPT ops are not unwound while TDX cannot be brought up but
KVM is still loaded.
> + vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size, sizeof(struct kvm_tdx));
> +
> + vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
> + vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
> + vt_x86_ops.free_external_spt = tdx_sept_free_private_spt;
> + vt_x86_ops.remove_external_spte = tdx_sept_remove_private_spte;
> + vt_x86_ops.protected_apic_has_interrupt = tdx_protected_apic_has_interrupt;
> +}
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index 51f98443e8a2..ca39a9391db1 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -8,6 +8,7 @@
> #ifdef CONFIG_KVM_INTEL_TDX
> #include "common.h"
>
> +void tdx_hardware_setup(void);
> int tdx_bringup(void);
> void tdx_cleanup(void);
>
There's a build error when CONFIG_KVM_INTEL_TDX is off:
vmx/main.c: In function ‘vt_hardware_setup’:
vmx/main.c:34:17: error: implicit declaration of function ‘tdx_hardware_setup’;
did you mean ‘vmx_hardware_setup’? [-Wimplicit-function-declaration]
34 | tdx_hardware_setup();
| ^~~~~~~~~~~~~~~~~~
| vmx_hardware_setup
.. for which you need a stub for tdx_hardware_setup() when CONFIG_KVM_INTEL_TDX
is off.
And one more thing:
With the above patch, we still have below code in vt_init():
/*
* TDX and VMX have different vCPU structures. Calculate the
* maximum size/align so that kvm_init() can use the larger
* values to create the kmem_vcpu_cache.
*/
vcpu_size = sizeof(struct vcpu_vmx);
vcpu_align = __alignof__(struct vcpu_vmx);
if (enable_tdx) {
vcpu_size = max_t(unsigned, vcpu_size,
sizeof(struct vcpu_tdx));
vcpu_align = max_t(unsigned, vcpu_align,
__alignof__(struct vcpu_tdx));
kvm_caps.supported_vm_types |= BIT(KVM_X86_TDX_VM);
}
It's kinda ugly too IMHO.
Since we already have @vm_size in kvm_x86_ops, how about also adding vcpu_size
and vcpu_align to it? Then they can be treated in the same way as vm_size for
TDX.
They are not needed for SVM, but it doesn't hurt that much?
Compile test only:
From b3a4b29cbd371189860b103813354813912019fa Mon Sep 17 00:00:00 2001
From: Kai Huang <kai.huang@...el.com>
Date: Wed, 21 May 2025 10:32:28 +1200
Subject: [PATCH] vcpu size/align for TDX
Signed-off-by: Kai Huang <kai.huang@...el.com>
---
arch/x86/include/asm/kvm_host.h | 2 ++
arch/x86/kvm/svm/svm.c | 6 ++++--
arch/x86/kvm/vmx/main.c | 20 +++-----------------
arch/x86/kvm/vmx/tdx.c | 7 +++++++
4 files changed, 16 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ed9b65785a24..7b96b6b30a5c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1684,6 +1684,8 @@ struct kvm_x86_ops {
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
unsigned int vm_size;
+ unsigned int vcpu_size;
+ unsigned int vcpu_align;
int (*vm_init)(struct kvm *kvm);
void (*vm_destroy)(struct kvm *kvm);
void (*vm_pre_destroy)(struct kvm *kvm);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index dc8e9af49f11..6a43d6402219 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5094,6 +5094,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_reset = svm_vcpu_reset,
.vm_size = sizeof(struct kvm_svm),
+ .vcpu_size = sizeof(struct vcpu_svm),
+ .vcpu_align = __alignof__(struct vcpu_svm),
+
.vm_init = svm_vm_init,
.vm_destroy = svm_vm_destroy,
@@ -5543,8 +5546,7 @@ static int __init svm_init(void)
* Common KVM initialization _must_ come last, after this, /dev/kvm is
* exposed to userspace!
*/
- r = kvm_init(sizeof(struct vcpu_svm), __alignof__(struct vcpu_svm),
- THIS_MODULE);
+ r = kvm_init(svm_x86_ops.vcpu_size, svm_x86_ops.vcpu_align,
THIS_MODULE);
if (r)
goto err_kvm_init;
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index c064d79b7550..86fdcaca7061 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -879,6 +879,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.has_emulated_msr = vt_has_emulated_msr,
.vm_size = sizeof(struct kvm_vmx),
+ .vcpu_size = sizeof(struct vcpu_vmx),
+ .vcpu_align = __alignof__(struct vcpu_vmx),
.vm_init = vt_vm_init,
.vm_pre_destroy = vt_vm_pre_destroy,
@@ -1035,7 +1037,6 @@ module_exit(vt_exit);
static int __init vt_init(void)
{
- unsigned vcpu_size, vcpu_align;
int r;
r = vmx_init();
@@ -1047,26 +1048,11 @@ static int __init vt_init(void)
if (r)
goto err_tdx_bringup;
- /*
- * TDX and VMX have different vCPU structures. Calculate the
- * maximum size/align so that kvm_init() can use the larger
- * values to create the kmem_vcpu_cache.
- */
- vcpu_size = sizeof(struct vcpu_vmx);
- vcpu_align = __alignof__(struct vcpu_vmx);
- if (enable_tdx) {
- vcpu_size = max_t(unsigned, vcpu_size,
- sizeof(struct vcpu_tdx));
- vcpu_align = max_t(unsigned, vcpu_align,
- __alignof__(struct vcpu_tdx));
- kvm_caps.supported_vm_types |= BIT(KVM_X86_TDX_VM);
- }
-
/*
* Common KVM initialization _must_ come last, after this, /dev/kvm is
* exposed to userspace!
*/
- r = kvm_init(vcpu_size, vcpu_align, THIS_MODULE);
+ r = kvm_init(vt_x86_ops.vcpu_size, vt_x86_ops.vcpu_align, THIS_MODULE);
if (r)
goto err_kvm_init;
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index b4985a64501c..dc44cabf89e6 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -3522,6 +3522,9 @@ int __init tdx_bringup(void)
enable_tdx = 0;
}
+ if (enable_tdx)
+ kvm_caps.supported_vm_types |= BIT(KVM_X86_TDX_VM);
+
return r;
success_disable_tdx:
@@ -3537,6 +3540,10 @@ void __init tdx_hardware_setup(void)
* disabled but KVM will continue loading (see tdx_bringup()).
*/
vt_x86_ops.vm_size = max_t(unsigned int, vt_x86_ops.vm_size,
sizeof(struct kvm_tdx));
+ vt_x86_ops.vcpu_size = max_t(unsigned int, vt_x86_ops.vcpu_size,
+ sizeof(struct vcpu_tdx));
+ vt_x86_ops.vcpu_align = max_t(unsigned int, vt_x86_ops.vcpu_align,
+ __alignof__(struct vcpu_tdx));
vt_x86_ops.link_external_spt = tdx_sept_link_private_spt;
vt_x86_ops.set_external_spte = tdx_sept_set_private_spte;
--
2.49.0
Powered by blists - more mailing lists