[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <822766d1-746a-4b15-b790-d1e331e4d9aa@suse.com>
Date: Mon, 19 Aug 2024 18:35:05 +0300
From: Nikolay Borisov <nik.borisov@...e.com>
To: Rick Edgecombe <rick.p.edgecombe@...el.com>, seanjc@...gle.com,
pbonzini@...hat.com, kvm@...r.kernel.org
Cc: kai.huang@...el.com, isaku.yamahata@...il.com,
tony.lindgren@...ux.intel.com, xiaoyao.li@...el.com,
linux-kernel@...r.kernel.org, Isaku Yamahata <isaku.yamahata@...el.com>
Subject: Re: [PATCH 14/25] KVM: TDX: initialize VM with TDX specific
parameters
On 13.08.24 г. 1:48 ч., Rick Edgecombe wrote:
> From: Isaku Yamahata <isaku.yamahata@...el.com>
>
> After the crypto-protection key has been configured, TDX requires a
> VM-scope initialization as a step of creating the TDX guest. This
> "per-VM" TDX initialization does the global configurations/features that
> the TDX guest can support, such as guest's CPUIDs (emulated by the TDX
> module), the maximum number of vcpus etc.
>
> This "per-VM" TDX initialization must be done before any "vcpu-scope" TDX
> initialization. To match this better, require the KVM_TDX_INIT_VM IOCTL()
> to be done before KVM creates any vcpus.
>
> Co-developed-by: Xiaoyao Li <xiaoyao.li@...el.com>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@...el.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@...el.com>
> ---
> uAPI breakout v1:
> - Drop TDX_TD_XFAM_CET and use XFEATURE_MASK_CET_{USER, KERNEL}.
> - Update for the wrapper functions for SEAMCALLs. (Sean)
> - Move gfn_shared_mask settings into this patch due to MMU section move
> - Fix bisectability issues in headers (Kai)
> - Updates from seamcall overhaul (Kai)
> - Allow userspace configure xfam directly
> - Check if user sets non-configurable bits in CPUIDs
> - Rename error->hw_error
> - Move code change to tdx_module_setup() to __tdx_bringup() due to
> initializing is done in post hardware_setup() now and
> tdx_module_setup() is removed. Remove the code to use API to read
> global metadata but use exported 'struct tdx_sysinfo' pointer.
> - Replace 'tdx_info->nr_tdcs_pages' with a wrapper
> tdx_sysinfo_nr_tdcs_pages() because the 'struct tdx_sysinfo' doesn't
> have nr_tdcs_pages directly.
> - Replace tdx_info->max_vcpus_per_td with the new exported pointer in
> tdx_vm_init().
> - Decrease the reserved space for struct kvm_tdx_init_vm (Kai)
> - Use sizeof_field() for struct kvm_tdx_init_vm cpuids (Tony)
> - No need to init init_vm, it gets copied over in tdx_td_init() (Chao)
> - Use kmalloc() instead of () kzalloc for init_vm in tdx_td_init() (Chao)
> - Add more line breaks to tdx_td_init() to make code easier to read (Tony)
> - Clarify patch description (Kai)
>
> v19:
> - Check NO_RBP_MOD of feature0 and set it
> - Update the comment for PT and CET
>
> v18:
> - remove the change of tools/arch/x86/include/uapi/asm/kvm.h
> - typo in comment. sha348 => sha384
> - updated comment in setup_tdparams_xfam()
> - fix setup_tdparams_xfam() to use init_vm instead of td_params
>
> v16:
> - Removed AMX check as the KVM upstream supports AMX.
> - Added CET flag to guest supported xss
> ---
> arch/x86/include/uapi/asm/kvm.h | 24 ++++
> arch/x86/kvm/cpuid.c | 7 +
> arch/x86/kvm/cpuid.h | 2 +
> arch/x86/kvm/vmx/tdx.c | 237 ++++++++++++++++++++++++++++++--
> arch/x86/kvm/vmx/tdx.h | 4 +
> arch/x86/kvm/vmx/tdx_ops.h | 12 ++
> 6 files changed, 276 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 2e3caa5a58fd..95ae2d4a4697 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -929,6 +929,7 @@ struct kvm_hyperv_eventfd {
> /* Trust Domain eXtension sub-ioctl() commands. */
> enum kvm_tdx_cmd_id {
> KVM_TDX_CAPABILITIES = 0,
> + KVM_TDX_INIT_VM,
>
> KVM_TDX_CMD_NR_MAX,
> };
> @@ -970,4 +971,27 @@ struct kvm_tdx_capabilities {
> struct kvm_tdx_cpuid_config cpuid_configs[];
> };
>
> +struct kvm_tdx_init_vm {
> + __u64 attributes;
> + __u64 xfam;
> + __u64 mrconfigid[6]; /* sha384 digest */
> + __u64 mrowner[6]; /* sha384 digest */
> + __u64 mrownerconfig[6]; /* sha384 digest */
> +
> + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */
> + __u64 reserved[12];
> +
> + /*
> + * Call KVM_TDX_INIT_VM before vcpu creation, thus before
> + * KVM_SET_CPUID2.
> + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the
> + * TDX module directly virtualizes those CPUIDs without VMM. The user
> + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with
> + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of
> + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX
> + * module doesn't virtualize.
> + */
> + struct kvm_cpuid2 cpuid;
> +};
> +
> #endif /* _ASM_X86_KVM_H */
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 2617be544480..7310d8a8a503 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -1487,6 +1487,13 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
> return r;
> }
>
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2(
> + struct kvm_cpuid_entry2 *entries, int nent, u32 function, u64 index)
> +{
> + return cpuid_entry2_find(entries, nent, function, index);
> +}
> +EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry2);
> +
> struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> u32 function, u32 index)
> {
> diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
> index 41697cca354e..00570227e2ae 100644
> --- a/arch/x86/kvm/cpuid.h
> +++ b/arch/x86/kvm/cpuid.h
> @@ -13,6 +13,8 @@ void kvm_set_cpu_caps(void);
>
> void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu);
> void kvm_update_pv_runtime(struct kvm_vcpu *vcpu);
> +struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2(struct kvm_cpuid_entry2 *entries,
> + int nent, u32 function, u64 index);
> struct kvm_cpuid_entry2 *kvm_find_cpuid_entry_index(struct kvm_vcpu *vcpu,
> u32 function, u32 index);
> struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index a0954c3928e2..a6c711715a4a 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -7,6 +7,7 @@
> #include "tdx.h"
> #include "tdx_ops.h"
>
> +
> #undef pr_fmt
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>
> @@ -356,12 +357,16 @@ static int tdx_do_tdh_mng_key_config(void *param)
> return 0;
> }
>
> -static int __tdx_td_init(struct kvm *kvm);
> -
> int tdx_vm_init(struct kvm *kvm)
> {
> kvm->arch.has_private_mem = true;
>
> + /*
> + * This function initializes only KVM software construct. It doesn't
> + * initialize TDX stuff, e.g. TDCS, TDR, TDCX, HKID etc.
> + * It is handled by KVM_TDX_INIT_VM, __tdx_td_init().
> + */
If you need to put a comment like that it means the function has the
wrong name.
> +
> /*
> * TDX has its own limit of the number of vcpus in addition to
> * KVM_MAX_VCPUS.
<snip>
> +
> +static int __tdx_td_init(struct kvm *kvm, struct td_params *td_params,
> + u64 *seamcall_err)
What criteria did you use to split __tdx_td_init from tdx_td_init? Seems
somewhar arbitrary, I think it's best if the TD VM init code is in a
single function, yet it will be rather large but the code should be
self-explanatory and fairly linear. Additionally I think some of the
code can be factored out in more specific helpers i.e the key
programming bits can be a separate helper.
> {
> struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> cpumask_var_t packages;
> @@ -427,8 +547,9 @@ static int __tdx_td_init(struct kvm *kvm)
> unsigned long tdr_pa = 0;
> unsigned long va;
> int ret, i;
> - u64 err;
> + u64 err, rcx;
>
> + *seamcall_err = 0;
> ret = tdx_guest_keyid_alloc();
> if (ret < 0)
> return ret;
> @@ -543,10 +664,23 @@ static int __tdx_td_init(struct kvm *kvm)
> }
> }
>
> - /*
> - * Note, TDH_MNG_INIT cannot be invoked here. TDH_MNG_INIT requires a dedicated
> - * ioctl() to define the configure CPUID values for the TD.
> - */
> + err = tdh_mng_init(kvm_tdx, __pa(td_params), &rcx);
> + if ((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_INVALID) {
> + /*
> + * Because a user gives operands, don't warn.
> + * Return a hint to the user because it's sometimes hard for the
> + * user to figure out which operand is invalid. SEAMCALL status
> + * code includes which operand caused invalid operand error.
> + */
> + *seamcall_err = err;
> + ret = -EINVAL;
> + goto teardown;
> + } else if (WARN_ON_ONCE(err)) {
> + pr_tdx_error_1(TDH_MNG_INIT, err, rcx);
> + ret = -EIO;
> + goto teardown;
> + }
> +
> return 0;
>
> /*
> @@ -592,6 +726,86 @@ static int __tdx_td_init(struct kvm *kvm)
> return ret;
> }
>
> +static int tdx_td_init(struct kvm *kvm, struct kvm_tdx_cmd *cmd)
> +{
> + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
> + struct kvm_tdx_init_vm *init_vm;
> + struct td_params *td_params = NULL;
> + int ret;
> +
> + BUILD_BUG_ON(sizeof(*init_vm) != 256 + sizeof_field(struct kvm_tdx_init_vm, cpuid));
> + BUILD_BUG_ON(sizeof(struct td_params) != 1024);
> +
> + if (is_hkid_assigned(kvm_tdx))
> + return -EINVAL;
> +
> + if (cmd->flags)
> + return -EINVAL;
> +
> + init_vm = kmalloc(sizeof(*init_vm) +
> + sizeof(init_vm->cpuid.entries[0]) * KVM_MAX_CPUID_ENTRIES,
> + GFP_KERNEL);
> + if (!init_vm)
> + return -ENOMEM;
> +
> + if (copy_from_user(init_vm, u64_to_user_ptr(cmd->data), sizeof(*init_vm))) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + if (init_vm->cpuid.nent > KVM_MAX_CPUID_ENTRIES) {
> + ret = -E2BIG;
> + goto out;
> + }
> +
> + if (copy_from_user(init_vm->cpuid.entries,
> + u64_to_user_ptr(cmd->data) + sizeof(*init_vm),
> + flex_array_size(init_vm, cpuid.entries, init_vm->cpuid.nent))) {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + if (memchr_inv(init_vm->reserved, 0, sizeof(init_vm->reserved))) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (init_vm->cpuid.padding) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + td_params = kzalloc(sizeof(struct td_params), GFP_KERNEL);
> + if (!td_params) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + ret = setup_tdparams(kvm, td_params, init_vm);
> + if (ret)
> + goto out;
> +
> + ret = __tdx_td_init(kvm, td_params, &cmd->hw_error);
> + if (ret)
> + goto out;
> +
> + kvm_tdx->tsc_offset = td_tdcs_exec_read64(kvm_tdx, TD_TDCS_EXEC_TSC_OFFSET);
> + kvm_tdx->attributes = td_params->attributes;
> + kvm_tdx->xfam = td_params->xfam;
> +
> + if (td_params->exec_controls & TDX_EXEC_CONTROL_MAX_GPAW)
> + kvm->arch.gfn_direct_bits = gpa_to_gfn(BIT_ULL(51));
> + else
> + kvm->arch.gfn_direct_bits = gpa_to_gfn(BIT_ULL(47));
> +
> +out:
> + /* kfree() accepts NULL. */
> + kfree(init_vm);
> + kfree(td_params);
> +
> + return ret;
> +}
> +
> int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
> {
> struct kvm_tdx_cmd tdx_cmd;
> @@ -613,6 +827,9 @@ int tdx_vm_ioctl(struct kvm *kvm, void __user *argp)
> case KVM_TDX_CAPABILITIES:
> r = tdx_get_capabilities(&tdx_cmd);
> break;
> + case KVM_TDX_INIT_VM:
> + r = tdx_td_init(kvm, &tdx_cmd);
> + break;
> default:
> r = -EINVAL;
> goto out;
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index 268959d0f74f..8912cb6d5bc2 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -16,7 +16,11 @@ struct kvm_tdx {
> unsigned long tdr_pa;
> unsigned long *tdcs_pa;
>
> + u64 attributes;
> + u64 xfam;
> int hkid;
> +
> + u64 tsc_offset;
> };
>
> struct vcpu_tdx {
> diff --git a/arch/x86/kvm/vmx/tdx_ops.h b/arch/x86/kvm/vmx/tdx_ops.h
> index 3f64c871a3f2..0363d8544f42 100644
> --- a/arch/x86/kvm/vmx/tdx_ops.h
> +++ b/arch/x86/kvm/vmx/tdx_ops.h
> @@ -399,4 +399,16 @@ static inline u64 tdh_vp_wr(struct vcpu_tdx *tdx, u64 field, u64 val, u64 mask)
> return seamcall(TDH_VP_WR, &in);
> }
>
> +static __always_inline u64 td_tdcs_exec_read64(struct kvm_tdx *kvm_tdx, u32 field)
> +{
> + u64 err, data;
> +
> + err = tdh_mng_rd(kvm_tdx, TDCS_EXEC(field), &data);
> + if (unlikely(err)) {
> + pr_err("TDH_MNG_RD[EXEC.0x%x] failed: 0x%llx\n", field, err);
> + return 0;
> + }
> + return data;
> +}
> +
> #endif /* __KVM_X86_TDX_OPS_H */
Powered by blists - more mailing lists