>From 7277396033c21569dbed0a52fa92804307db111e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 29 Aug 2025 09:19:11 -0700 Subject: [PATCH 2/2] KVM: TDX: Guard VM state transitions Signed-off-by: Sean Christopherson --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/main.c | 9 +++ arch/x86/kvm/vmx/tdx.c | 112 ++++++++++++++++++++++------- arch/x86/kvm/vmx/x86_ops.h | 1 + arch/x86/kvm/x86.c | 7 ++ 6 files changed, 105 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 18a5c3119e1a..fe2bb2e2ebc8 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -128,6 +128,7 @@ KVM_X86_OP(enable_smi_window) KVM_X86_OP_OPTIONAL(dev_get_attr) KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl) +KVM_X86_OP_OPTIONAL(vcpu_mem_enc_async_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d0a8404a6b8f..ac5d3b8fa49f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1911,6 +1911,7 @@ struct kvm_x86_ops { int (*dev_get_attr)(u32 group, u64 attr, u64 *val); int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); + int (*vcpu_mem_enc_async_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c index dbab1c15b0cd..e0e35ceec9b1 100644 --- a/arch/x86/kvm/vmx/main.c +++ b/arch/x86/kvm/vmx/main.c @@ -831,6 +831,14 @@ static int vt_vcpu_mem_enc_ioctl(struct kvm_vcpu *vcpu, void __user *argp) return tdx_vcpu_ioctl(vcpu, argp); } +static int vt_vcpu_mem_enc_async_ioctl(struct kvm_vcpu *vcpu, void __user *argp) +{ + if (!is_td_vcpu(vcpu)) + return -EINVAL; + + return tdx_vcpu_async_ioctl(vcpu, argp); +} + static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn) { if (is_td(kvm)) @@ -1004,6 +1012,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = { .mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl), .vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl), + .vcpu_mem_enc_async_ioctl = vt_op_tdx_only(vcpu_mem_enc_async_ioctl), .private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level) }; diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index d6c9defad9cd..c595d9cb6dcd 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -2624,6 +2624,44 @@ static int tdx_read_cpuid(struct kvm_vcpu *vcpu, u32 leaf, u32 sub_leaf, return -EIO; } +typedef void * tdx_vm_state_guard_t; + +static tdx_vm_state_guard_t tdx_acquire_vm_state_locks(struct kvm *kvm) +{ + int r; + + mutex_lock(&kvm->lock); + mutex_lock(&kvm->slots_lock); + + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + goto out_err; + } + + r = kvm_lock_all_vcpus(kvm); + if (r) + goto out_err; + + return kvm; + +out_err: + mutex_unlock(&kvm->slots_lock); + mutex_unlock(&kvm->lock); + + return ERR_PTR(r); +} + +static void tdx_release_vm_state_locks(struct kvm *kvm) +{ + kvm_unlock_all_vcpus(kvm); + mutex_unlock(&kvm->slots_lock); + mutex_unlock(&kvm->lock); +} + +DEFINE_CLASS(tdx_vm_state_guard, tdx_vm_state_guard_t, + if (!IS_ERR(_T)) tdx_release_vm_state_locks(_T), + tdx_acquire_vm_state_locks(kvm), struct kvm *kvm); + static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); @@ -2634,6 +2672,10 @@ static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd) BUILD_BUG_ON(sizeof(*init_vm) != 256 + sizeof_field(struct kvm_tdx_init_vm, cpuid)); BUILD_BUG_ON(sizeof(struct td_params) != 1024); + CLASS(tdx_vm_state_guard, guard)(kvm); + if (IS_ERR(guard)) + return PTR_ERR(guard); + if (kvm_tdx->state != TD_STATE_UNINITIALIZED) return -EINVAL; @@ -2745,7 +2787,9 @@ static int tdx_td_finalize(struct kvm *kvm, struct kvm_tdx_cmd *cmd) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); - guard(mutex)(&kvm->slots_lock); + CLASS(tdx_vm_state_guard, guard)(kvm); + if (IS_ERR(guard)) + return PTR_ERR(guard); if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE) return -EINVAL; @@ -2763,22 +2807,25 @@ static int tdx_td_finalize(struct kvm *kvm, struct kvm_tdx_cmd *cmd) return 0; } +static int tdx_get_cmd(void __user *argp, struct kvm_tdx_cmd *cmd) +{ + if (copy_from_user(cmd, argp, sizeof(cmd))) + return -EFAULT; + + if (cmd->hw_error) + return -EINVAL; + + return 0; +} + int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_tdx_cmd tdx_cmd; int r; - if (copy_from_user(&tdx_cmd, argp, sizeof(struct kvm_tdx_cmd))) - return -EFAULT; - - /* - * Userspace should never set hw_error. It is used to fill - * hardware-defined error by the kernel. - */ - if (tdx_cmd.hw_error) - return -EINVAL; - - mutex_lock(&kvm->lock); + r = tdx_get_cmd(argp, &tdx_cmd); + if (r) + return r; switch (tdx_cmd.id) { case KVM_TDX_CAPABILITIES: @@ -2791,15 +2838,12 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) r = tdx_td_finalize(kvm, &tdx_cmd); break; default: - r = -EINVAL; - goto out; + return -EINVAL; } if (copy_to_user(argp, &tdx_cmd, sizeof(struct kvm_tdx_cmd))) r = -EFAULT; -out: - mutex_unlock(&kvm->lock); return r; } @@ -3079,11 +3123,13 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c long gmem_ret; int ret; + CLASS(tdx_vm_state_guard, guard)(kvm); + if (IS_ERR(guard)) + return PTR_ERR(guard); + if (tdx->state != VCPU_TD_STATE_INITIALIZED) return -EINVAL; - guard(mutex)(&kvm->slots_lock); - /* Once TD is finalized, the initial guest memory is fixed. */ if (kvm_tdx->state == TD_STATE_RUNNABLE) return -EINVAL; @@ -3101,6 +3147,8 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c !vt_is_tdx_private_gpa(kvm, region.gpa + (region.nr_pages << PAGE_SHIFT) - 1)) return -EINVAL; + vcpu_load(vcpu); + ret = 0; while (region.nr_pages) { if (signal_pending(current)) { @@ -3132,11 +3180,28 @@ static int tdx_vcpu_init_mem_region(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *c cond_resched(); } + vcpu_put(vcpu); + if (copy_to_user(u64_to_user_ptr(cmd->data), ®ion, sizeof(region))) ret = -EFAULT; return ret; } +int tdx_vcpu_async_ioctl(struct kvm_vcpu *vcpu, void __user *argp) +{ + struct kvm_tdx_cmd cmd; + int r; + + r = tdx_get_cmd(argp, &cmd); + if (r) + return r; + + if (cmd.id != KVM_TDX_INIT_MEM_REGION) + return -ENOIOCTLCMD; + + return tdx_vcpu_init_mem_region(vcpu, &cmd); +} + int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) { struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm); @@ -3146,19 +3211,14 @@ int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp) if (!is_hkid_assigned(kvm_tdx) || kvm_tdx->state == TD_STATE_RUNNABLE) return -EINVAL; - if (copy_from_user(&cmd, argp, sizeof(cmd))) - return -EFAULT; - - if (cmd.hw_error) - return -EINVAL; + ret = tdx_get_cmd(argp, &cmd); + if (ret) + return ret; switch (cmd.id) { case KVM_TDX_INIT_VCPU: ret = tdx_vcpu_init(vcpu, &cmd); break; - case KVM_TDX_INIT_MEM_REGION: - ret = tdx_vcpu_init_mem_region(vcpu, &cmd); - break; case KVM_TDX_GET_CPUID: ret = tdx_vcpu_get_cpuid(vcpu, &cmd); break; diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h index 2b3424f638db..a797101a2150 100644 --- a/arch/x86/kvm/vmx/x86_ops.h +++ b/arch/x86/kvm/vmx/x86_ops.h @@ -149,6 +149,7 @@ int tdx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); int tdx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr); int tdx_vcpu_ioctl(struct kvm_vcpu *vcpu, void __user *argp); +int tdx_vcpu_async_ioctl(struct kvm_vcpu *vcpu, void __user *argp); void tdx_flush_tlb_current(struct kvm_vcpu *vcpu); void tdx_flush_tlb_all(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92e916eba6a9..281cd0980245 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6946,6 +6946,13 @@ static int kvm_vm_ioctl_set_clock(struct kvm *kvm, void __user *argp) long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_MEMORY_ENCRYPT_OP && + kvm_x86_ops.vcpu_mem_enc_async_ioctl) + return kvm_x86_call(vcpu_mem_enc_async_ioctl)(vcpu, argp); + return -ENOIOCTLCMD; } -- 2.51.0.318.gd7df087d1a-goog