linux-kernel - [PATCH 12/25] KVM: TDX: Allow userspace to configure maximum vCPUs for TDX guests

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240812224820.34826-13-rick.p.edgecombe@intel.com>
Date: Mon, 12 Aug 2024 15:48:07 -0700
From: Rick Edgecombe <rick.p.edgecombe@...el.com>
To: seanjc@...gle.com,
	pbonzini@...hat.com,
	kvm@...r.kernel.org
Cc: kai.huang@...el.com,
	isaku.yamahata@...il.com,
	tony.lindgren@...ux.intel.com,
	xiaoyao.li@...el.com,
	linux-kernel@...r.kernel.org,
	rick.p.edgecombe@...el.com,
	Isaku Yamahata <isaku.yamahata@...el.com>
Subject: [PATCH 12/25] KVM: TDX: Allow userspace to configure maximum vCPUs for TDX guests

From: Isaku Yamahata <isaku.yamahata@...el.com>

TDX has its own mechanism to control the maximum number of vCPUs that
the TDX guest can use.  When creating a TDX guest, the maximum number of
vCPUs of the guest needs to be passed to the TDX module as part of the
measurement of the guest.  Depending on TDX module's version, it may
also report the maximum vCPUs it can support for all TDX guests.

Because the maximum number of vCPUs is part of the measurement, thus
part of attestation, it's better to allow the userspace to be able to
configure it.  E.g. the users may want to precisely control the maximum
number of vCPUs their precious VMs can use.

The actual control itself must be done via the TDH.MNG.INIT SEAMCALL,
where the number of maximum cpus is part of the input to the TDX module,
but KVM needs to support the "per-VM maximum number of vCPUs" and
reflect that in the KVM_CAP_MAX_VCPUS.

Currently, the KVM x86 always reports KVM_MAX_VCPUS for all VMs but
doesn't allow to enable KVM_CAP_MAX_VCPUS to configure the number of
maximum vCPUs on VM-basis.

Add "per-VM maximum number of vCPUs" to KVM x86/TDX to accommodate TDX's
needs.

Specifically, use KVM's existing KVM_ENABLE_CAP IOCTL() to allow the
userspace to configure the maximum vCPUs by making KVM x86 support
enabling the KVM_CAP_MAX_VCPUS cap on VM-basis.

For that, add a new 'kvm_x86_ops::vm_enable_cap()' callback and call
it from kvm_vm_ioctl_enable_cap() as a placeholder to handle the
KVM_CAP_MAX_VCPUS for TDX guests (and other KVM_CAP_xx for TDX and/or
other VMs if needed in the future).

Implement the callback for TDX guest to check whether the maximum vCPUs
passed from usrspace can be supported by TDX, and if it can, override
the 'struct kvm::max_vcpus'.  Leave VMX guests and all AMD guests
unsupported to avoid any side-effect for those VMs.

Accordingly, in the KVM_CHECK_EXTENSION IOCTL(), change to return the
'struct kvm::max_vcpus' for a given VM for the KVM_CAP_MAX_VCPUS.

Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@...el.com>
---
uAPI breakout v1:
 - Change to use exported 'struct tdx_sysinfo' pointer.
 - Remove the code to read 'max_vcpus_per_td' since it is now done in
   TDX host code.
 - Drop max_vcpu ops to use kvm.max_vcpus
 - Remove TDX_MAX_VCPUS (Kai)
 - Use type cast (u16) instead of calling memcpy() when reading the
   'max_vcpus_per_td' (Kai)
 - Improve change log and change patch title from "KVM: TDX: Make
   KVM_CAP_MAX_VCPUS backend specific" (Kai)
---
 arch/x86/include/asm/kvm-x86-ops.h |  1 +
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/kvm/vmx/main.c            | 10 ++++++++++
 arch/x86/kvm/vmx/tdx.c             | 29 +++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/x86_ops.h         |  5 +++++
 arch/x86/kvm/x86.c                 |  4 ++++
 6 files changed, 50 insertions(+)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 538f50eee86d..bd7434fe5d37 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -19,6 +19,7 @@ KVM_X86_OP(hardware_disable)
 KVM_X86_OP(hardware_unsetup)
 KVM_X86_OP(has_emulated_msr)
 KVM_X86_OP(vcpu_after_set_cpuid)
+KVM_X86_OP_OPTIONAL(vm_enable_cap)
 KVM_X86_OP(vm_init)
 KVM_X86_OP_OPTIONAL(vm_destroy)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c754183e0932..9d15f810f046 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1648,6 +1648,7 @@ struct kvm_x86_ops {
 	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
 	unsigned int vm_size;
+	int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap);
 	int (*vm_init)(struct kvm *kvm);
 	void (*vm_destroy)(struct kvm *kvm);
 
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 59f4d2d42620..cd53091ddaab 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -7,6 +7,7 @@
 #include "pmu.h"
 #include "posted_intr.h"
 #include "tdx.h"
+#include "tdx_arch.h"
 
 static __init int vt_hardware_setup(void)
 {
@@ -41,6 +42,14 @@ static __init int vt_hardware_setup(void)
 	return 0;
 }
 
+static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	if (is_td(kvm))
+		return tdx_vm_enable_cap(kvm, cap);
+
+	return -EINVAL;
+}
+
 static int vt_mem_enc_ioctl(struct kvm *kvm, void __user *argp)
 {
 	if (!is_td(kvm))
@@ -72,6 +81,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 	.has_emulated_msr = vmx_has_emulated_msr,
 
 	.vm_size = sizeof(struct kvm_vmx),
+	.vm_enable_cap = vt_vm_enable_cap,
 	.vm_init = vmx_vm_init,
 	.vm_destroy = vmx_vm_destroy,
 
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index f9faec217ea9..84cd9b4f90b5 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -44,6 +44,35 @@ struct kvm_tdx_caps {
 
 static struct kvm_tdx_caps *kvm_tdx_caps;
 
+int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r;
+
+	switch (cap->cap) {
+	case KVM_CAP_MAX_VCPUS: {
+		if (cap->flags || cap->args[0] == 0)
+			return -EINVAL;
+		if (cap->args[0] > KVM_MAX_VCPUS ||
+		    cap->args[0] > tdx_sysinfo->td_conf.max_vcpus_per_td)
+			return -E2BIG;
+
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus)
+			r = -EBUSY;
+		else {
+			kvm->max_vcpus = cap->args[0];
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	}
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
 	const struct tdx_sysinfo_td_conf *td_conf = &tdx_sysinfo->td_conf;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index c69ca640abe6..c1bdf7d8fee3 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -119,8 +119,13 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
 void vmx_setup_mce(struct kvm_vcpu *vcpu);
 
 #ifdef CONFIG_INTEL_TDX_HOST
+int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
 int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 #else
+static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	return -EINVAL;
+};
 static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
 #endif
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7914ea50fd04..751b3841c48f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4754,6 +4754,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
+		if (kvm)
+			r = kvm->max_vcpus;
 		break;
 	case KVM_CAP_MAX_VCPU_ID:
 		r = KVM_MAX_VCPU_IDS;
@@ -6772,6 +6774,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 	}
 	default:
 		r = -EINVAL;
+		if (kvm_x86_ops.vm_enable_cap)
+			r = static_call(kvm_x86_vm_enable_cap)(kvm, cap);
 		break;
 	}
 	return r;
-- 
2.34.1