[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5cbb181c-f226-4d50-8b92-04775e8b65e0@intel.com>
Date: Tue, 25 Feb 2025 14:43:14 +0800
From: Xiaoyao Li <xiaoyao.li@...el.com>
To: Adrian Hunter <adrian.hunter@...el.com>, pbonzini@...hat.com,
seanjc@...gle.com
Cc: kvm@...r.kernel.org, rick.p.edgecombe@...el.com, kai.huang@...el.com,
reinette.chatre@...el.com, tony.lindgren@...ux.intel.com,
binbin.wu@...ux.intel.com, dmatlack@...gle.com, isaku.yamahata@...el.com,
nik.borisov@...e.com, linux-kernel@...r.kernel.org, yan.y.zhao@...el.com,
chao.gao@...el.com, weijiang.yang@...el.com
Subject: Re: [PATCH V2 07/12] KVM: TDX: restore host xsave state when exit
from the guest TD
On 1/29/2025 5:58 PM, Adrian Hunter wrote:
> From: Isaku Yamahata <isaku.yamahata@...el.com>
>
> On exiting from the guest TD, xsave state is clobbered. Restore xsave
> state on TD exit.
>
> Set up guest state so that existing kvm_load_host_xsave_state() can be
> used. Do not allow VCPU entry if guest state conflicts with the TD's
> configuration.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@...el.com>
> Signed-off-by: Adrian Hunter <adrian.hunter@...el.com>
> ---
> TD vcpu enter/exit v2:
> - Drop PT and CET feature flags (Chao)
> - Use cpu_feature_enabled() instead of static_cpu_has() (Chao)
> - Restore PKRU only if the host value differs from defined
> exit value (Chao)
> - Use defined masks to separate XFAM bits into XCR0/XSS (Adrian)
> - Use existing kvm_load_host_xsave_state() in place of
> tdx_restore_host_xsave_state() by defining guest CR4, XCR0, XSS
> and PKRU (Sean)
> - Do not enter if vital guest state is invalid (Adrian)
>
> TD vcpu enter/exit v1:
> - Remove noinstr on tdx_vcpu_enter_exit() (Sean)
> - Switch to kvm_host struct for xcr0 and xss
>
> v19:
> - Add EXPORT_SYMBOL_GPL(host_xcr0)
>
> v15 -> v16:
> - Added CET flag mask
> ---
> arch/x86/kvm/vmx/tdx.c | 72 ++++++++++++++++++++++++++++++++++++++----
> 1 file changed, 66 insertions(+), 6 deletions(-)
>
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 3f3d61935a58..e4355553569a 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -83,16 +83,21 @@ static u64 tdx_get_supported_attrs(const struct tdx_sys_info_td_conf *td_conf)
> return val;
> }
>
> +/*
> + * Before returning from TDH.VP.ENTER, the TDX Module assigns:
> + * XCR0 to the TD’s user-mode feature bits of XFAM (bits 7:0, 9, 18:17)
> + * IA32_XSS to the TD's supervisor-mode feature bits of XFAM (bits 8, 16:10)
> + */
> +#define TDX_XFAM_XCR0_MASK (GENMASK(7, 0) | BIT(9) | GENMASK(18, 17))
> +#define TDX_XFAM_XSS_MASK (BIT(8) | GENMASK(16, 10))
> +#define TDX_XFAM_MASK (TDX_XFAM_XCR0_MASK | TDX_XFAM_XSS_MASK)
No need to define TDX-specific mask for XFAM. kernel defines
XFEATURE_MASK_* and you can define XFEATURE_XCR0_MASK and
XFEATURE_XSS_MASK with XFEATURE_MASK_*.
> static u64 tdx_get_supported_xfam(const struct tdx_sys_info_td_conf *td_conf)
> {
> u64 val = kvm_caps.supported_xcr0 | kvm_caps.supported_xss;
>
> - /*
> - * PT and CET can be exposed to TD guest regardless of KVM's XSS, PT
> - * and, CET support.
> - */
> - val |= XFEATURE_MASK_PT | XFEATURE_MASK_CET_USER |
> - XFEATURE_MASK_CET_KERNEL;
> + /* Ensure features are in the masks */
> + val &= TDX_XFAM_MASK;
It's unncessary.
kvm_caps.supported_xcr0 | kvm_caps.supported_xss must be the subset of
TDX_XFAM_MASK;
> if ((val & td_conf->xfam_fixed1) != td_conf->xfam_fixed1)
> return 0;
> @@ -724,6 +729,19 @@ int tdx_vcpu_pre_run(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +static bool tdx_guest_state_is_invalid(struct kvm_vcpu *vcpu)
> +{
> + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> +
> + return vcpu->arch.xcr0 != (kvm_tdx->xfam & TDX_XFAM_XCR0_MASK) ||
> + vcpu->arch.ia32_xss != (kvm_tdx->xfam & TDX_XFAM_XSS_MASK) ||
> + vcpu->arch.pkru ||
> + (cpu_feature_enabled(X86_FEATURE_XSAVE) &&
> + !kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) ||
> + (cpu_feature_enabled(X86_FEATURE_XSAVES) &&
> + !guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVES));
guest_cpu_cap_has() is better to put into the path when userspace
configures the vcpu model. So that KVM can return error to userspace
earlier before running the vcpu.
> +}> +
> static noinstr void tdx_vcpu_enter_exit(struct kvm_vcpu *vcpu)
> {
> struct vcpu_tdx *tdx = to_tdx(vcpu);
> @@ -740,6 +758,8 @@ static noinstr void tdx_vcpu_enter_exit(struct kvm_vcpu *vcpu)
>
> fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
> {
> + struct vcpu_tdx *tdx = to_tdx(vcpu);
> +
> /*
> * force_immediate_exit requires vCPU entering for events injection with
> * an immediately exit followed. But The TDX module doesn't guarantee
> @@ -750,10 +770,22 @@ fastpath_t tdx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
> */
> WARN_ON_ONCE(force_immediate_exit);
>
> + if (WARN_ON_ONCE(tdx_guest_state_is_invalid(vcpu))) {
> + /*
> + * Invalid exit_reason becomes KVM_EXIT_INTERNAL_ERROR, refer
> + * tdx_handle_exit().
> + */
> + tdx->vt.exit_reason.full = -1u;
> + tdx->vp_enter_ret = -1u;
> + return EXIT_FASTPATH_NONE;
> + }
> +
> trace_kvm_entry(vcpu, force_immediate_exit);
>
> tdx_vcpu_enter_exit(vcpu);
>
> + kvm_load_host_xsave_state(vcpu);
> +
> vcpu->arch.regs_avail &= ~TDX_REGS_UNSUPPORTED_SET;
>
> trace_kvm_exit(vcpu, KVM_ISA_VMX);
> @@ -1878,9 +1910,23 @@ static int tdx_vcpu_get_cpuid(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
> return r;
> }
>
> +static u64 tdx_guest_cr0(struct kvm_vcpu *vcpu, u64 cr4)
> +{
> + u64 cr0 = ~CR0_RESERVED_BITS;
> +
> + if (cr4 & X86_CR4_CET)
> + cr0 |= X86_CR0_WP;
> +
> + cr0 |= X86_CR0_PE | X86_CR0_NE;
> + cr0 &= ~(X86_CR0_NW | X86_CR0_CD);
> +
> + return cr0;
> +}
> +
> static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
> {
> u64 apic_base;
> + struct kvm_tdx *kvm_tdx = to_kvm_tdx(vcpu->kvm);
> struct vcpu_tdx *tdx = to_tdx(vcpu);
> int ret;
>
> @@ -1903,6 +1949,20 @@ static int tdx_vcpu_init(struct kvm_vcpu *vcpu, struct kvm_tdx_cmd *cmd)
> if (ret)
> return ret;
>
> + vcpu->arch.cr4 = ~vcpu->arch.cr4_guest_rsvd_bits;
when userspace doesn't configure XFEATURE_MASK_PKRU in XFAM, it seems
kvm_load_host_xsave_state() will skip restore host's PKRU value.
Besides, vcpu->arch.cr4_guest_rsvd_bits depends on KVM_SET_CPUID*, we
need enfore the dependency that tdx_vcpu_init() needs to be called after
vcpu model is configured.
> + vcpu->arch.cr0 = tdx_guest_cr0(vcpu, vcpu->arch.cr4);
> + /*
> + * On return from VP.ENTER, the TDX Module sets XCR0 and XSS to the
> + * maximal values supported by the guest, and zeroes PKRU, so from
> + * KVM's perspective, those are the guest's values at all times.
> + */
It's better to also call out that this is only to make
kvm_load_host_xsave_state() work for TDX. They don't represent the real
guest value.
> + vcpu->arch.ia32_xss = kvm_tdx->xfam & TDX_XFAM_XSS_MASK;
> + vcpu->arch.xcr0 = kvm_tdx->xfam & TDX_XFAM_XCR0_MASK;
> + vcpu->arch.pkru = 0;
> +
> + /* TODO: freeze vCPU model before kvm_update_cpuid_runtime() */
> + kvm_update_cpuid_runtime(vcpu);
> +
> tdx->state = VCPU_TD_STATE_INITIALIZED;
>
> return 0;
Powered by blists - more mailing lists