[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <963c40a1-5ad9-4e25-b719-9cb1d98f5122@intel.com>
Date: Mon, 30 Sep 2024 10:14:01 +0800
From: Xiaoyao Li <xiaoyao.li@...el.com>
To: Rick Edgecombe <rick.p.edgecombe@...el.com>, seanjc@...gle.com,
pbonzini@...hat.com, kvm@...r.kernel.org
Cc: kai.huang@...el.com, isaku.yamahata@...il.com,
tony.lindgren@...ux.intel.com, linux-kernel@...r.kernel.org,
Isaku Yamahata <isaku.yamahata@...el.com>
Subject: Re: [PATCH 12/25] KVM: TDX: Allow userspace to configure maximum
vCPUs for TDX guests
On 8/13/2024 6:48 AM, Rick Edgecombe wrote:
> From: Isaku Yamahata <isaku.yamahata@...el.com>
>
> TDX has its own mechanism to control the maximum number of vCPUs that
> the TDX guest can use. When creating a TDX guest, the maximum number of
> vCPUs of the guest needs to be passed to the TDX module as part of the
> measurement of the guest. Depending on TDX module's version, it may
> also report the maximum vCPUs it can support for all TDX guests.
>
> Because the maximum number of vCPUs is part of the measurement, thus
> part of attestation, it's better to allow the userspace to be able to
> configure it. E.g. the users may want to precisely control the maximum
> number of vCPUs their precious VMs can use.
>
> The actual control itself must be done via the TDH.MNG.INIT SEAMCALL,
> where the number of maximum cpus is part of the input to the TDX module,
> but KVM needs to support the "per-VM maximum number of vCPUs" and
> reflect that in the KVM_CAP_MAX_VCPUS.
>
> Currently, the KVM x86 always reports KVM_MAX_VCPUS for all VMs but
> doesn't allow to enable KVM_CAP_MAX_VCPUS to configure the number of
> maximum vCPUs on VM-basis.
>
> Add "per-VM maximum number of vCPUs" to KVM x86/TDX to accommodate TDX's
> needs.
>
> Specifically, use KVM's existing KVM_ENABLE_CAP IOCTL() to allow the
> userspace to configure the maximum vCPUs by making KVM x86 support
> enabling the KVM_CAP_MAX_VCPUS cap on VM-basis.
>
> For that, add a new 'kvm_x86_ops::vm_enable_cap()' callback and call
> it from kvm_vm_ioctl_enable_cap() as a placeholder to handle the
> KVM_CAP_MAX_VCPUS for TDX guests (and other KVM_CAP_xx for TDX and/or
> other VMs if needed in the future).
>
> Implement the callback for TDX guest to check whether the maximum vCPUs
> passed from usrspace can be supported by TDX, and if it can, override
> the 'struct kvm::max_vcpus'. Leave VMX guests and all AMD guests
> unsupported to avoid any side-effect for those VMs.
>
> Accordingly, in the KVM_CHECK_EXTENSION IOCTL(), change to return the
> 'struct kvm::max_vcpus' for a given VM for the KVM_CAP_MAX_VCPUS.
The implementation of this patch should be dropped in next version and
be replaced with something suggested in
https://lore.kernel.org/kvm/ZmzaqRy2zjvlsDfL@google.com/
> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@...el.com>
> ---
> uAPI breakout v1:
> - Change to use exported 'struct tdx_sysinfo' pointer.
> - Remove the code to read 'max_vcpus_per_td' since it is now done in
> TDX host code.
> - Drop max_vcpu ops to use kvm.max_vcpus
> - Remove TDX_MAX_VCPUS (Kai)
> - Use type cast (u16) instead of calling memcpy() when reading the
> 'max_vcpus_per_td' (Kai)
> - Improve change log and change patch title from "KVM: TDX: Make
> KVM_CAP_MAX_VCPUS backend specific" (Kai)
> ---
> arch/x86/include/asm/kvm-x86-ops.h | 1 +
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/vmx/main.c | 10 ++++++++++
> arch/x86/kvm/vmx/tdx.c | 29 +++++++++++++++++++++++++++++
> arch/x86/kvm/vmx/x86_ops.h | 5 +++++
> arch/x86/kvm/x86.c | 4 ++++
> 6 files changed, 50 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 538f50eee86d..bd7434fe5d37 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -19,6 +19,7 @@ KVM_X86_OP(hardware_disable)
> KVM_X86_OP(hardware_unsetup)
> KVM_X86_OP(has_emulated_msr)
> KVM_X86_OP(vcpu_after_set_cpuid)
> +KVM_X86_OP_OPTIONAL(vm_enable_cap)
> KVM_X86_OP(vm_init)
> KVM_X86_OP_OPTIONAL(vm_destroy)
> KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index c754183e0932..9d15f810f046 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1648,6 +1648,7 @@ struct kvm_x86_ops {
> void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
>
> unsigned int vm_size;
> + int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap);
> int (*vm_init)(struct kvm *kvm);
> void (*vm_destroy)(struct kvm *kvm);
>
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 59f4d2d42620..cd53091ddaab 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -7,6 +7,7 @@
> #include "pmu.h"
> #include "posted_intr.h"
> #include "tdx.h"
> +#include "tdx_arch.h"
>
> static __init int vt_hardware_setup(void)
> {
> @@ -41,6 +42,14 @@ static __init int vt_hardware_setup(void)
> return 0;
> }
>
> +static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> + if (is_td(kvm))
> + return tdx_vm_enable_cap(kvm, cap);
> +
> + return -EINVAL;
> +}
> +
> static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
> {
> if (!is_td(kvm))
> @@ -72,6 +81,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> .has_emulated_msr = vmx_has_emulated_msr,
>
> .vm_size = sizeof(struct kvm_vmx),
> + .vm_enable_cap = vt_vm_enable_cap,
> .vm_init = vmx_vm_init,
> .vm_destroy = vmx_vm_destroy,
>
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index f9faec217ea9..84cd9b4f90b5 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -44,6 +44,35 @@ struct kvm_tdx_caps {
>
> static struct kvm_tdx_caps *kvm_tdx_caps;
>
> +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> + int r;
> +
> + switch (cap->cap) {
> + case KVM_CAP_MAX_VCPUS: {
> + if (cap->flags || cap->args[0] == 0)
> + return -EINVAL;
> + if (cap->args[0] > KVM_MAX_VCPUS ||
> + cap->args[0] > tdx_sysinfo->td_conf.max_vcpus_per_td)
> + return -E2BIG;
> +
> + mutex_lock(&kvm->lock);
> + if (kvm->created_vcpus)
> + r = -EBUSY;
> + else {
> + kvm->max_vcpus = cap->args[0];
> + r = 0;
> + }
> + mutex_unlock(&kvm->lock);
> + break;
> + }
> + default:
> + r = -EINVAL;
> + break;
> + }
> + return r;
> +}
> +
> static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
> {
> const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index c69ca640abe6..c1bdf7d8fee3 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -119,8 +119,13 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
> void vmx_setup_mce(struct kvm_vcpu *vcpu);
>
> #ifdef CONFIG_INTEL_TDX_HOST
> +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
> int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
> #else
> +static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> + return -EINVAL;
> +};
> static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
> #endif
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 7914ea50fd04..751b3841c48f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4754,6 +4754,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> break;
> case KVM_CAP_MAX_VCPUS:
> r = KVM_MAX_VCPUS;
> + if (kvm)
> + r = kvm->max_vcpus;
> break;
> case KVM_CAP_MAX_VCPU_ID:
> r = KVM_MAX_VCPU_IDS;
> @@ -6772,6 +6774,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
> }
> default:
> r = -EINVAL;
> + if (kvm_x86_ops.vm_enable_cap)
> + r = static_call(kvm_x86_vm_enable_cap)(kvm, cap);
> break;
> }
> return r;
Powered by blists - more mailing lists