[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bd2999b5-f2fc-4d86-a5c8-0d1af4d51bc0@intel.com>
Date: Thu, 11 Sep 2025 16:05:23 +0800
From: Xiaoyao Li <xiaoyao.li@...el.com>
To: Chao Gao <chao.gao@...el.com>, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: acme@...hat.com, bp@...en8.de, dave.hansen@...ux.intel.com,
hpa@...or.com, john.allen@....com, mingo@...nel.org, mingo@...hat.com,
minipli@...ecurity.net, mlevitsk@...hat.com, namhyung@...nel.org,
pbonzini@...hat.com, prsampat@....com, rick.p.edgecombe@...el.com,
seanjc@...gle.com, shuah@...nel.org, tglx@...utronix.de,
weijiang.yang@...el.com, x86@...nel.org, xin@...or.com
Subject: Re: [PATCH v14 11/22] KVM: VMX: Emulate read and write to CET MSRs
On 9/9/2025 5:39 PM, Chao Gao wrote:
> From: Yang Weijiang <weijiang.yang@...el.com>
>
> Add emulation interface for CET MSR access. The emulation code is split
> into common part and vendor specific part. The former does common checks
> for MSRs, e.g., accessibility, data validity etc., then passes operation
> to either XSAVE-managed MSRs via the helpers or CET VMCS fields.
I planed to continue the review after Sean posts v15 as he promised.
But I want to raise my question regarding it sooner so I just ask it on v14.
Do we expect to put the accessibility and data validity check always in
__kvm_{s,g}_msr(), when the handling cannot be put in
kvm_{g,s}et_common() only? i.e., there will be 3 case:
- All the handling in kvm_{g,s}et_common(), when the MSR emulation is
common to vmx and svm.
- generic accessibility and data validity check in __kvm_{g,s}et_msr()
and vendor specific handling in {vmx,svm}_{g,s}et_msr()
- generic accessibility and data validity check in __kvm_{g,s}et_msr() ,
vendor specific handling in {vmx,svm}_{g,s}et_msr() and other generic
handling in kvm_{g,s}et_common()
> SSP can only be read via RDSSP. Writing even requires destructive and
> potentially faulting operations such as SAVEPREVSSP/RSTORSSP or
> SETSSBSY/CLRSSBSY. Let the host use a pseudo-MSR that is just a wrapper
> for the GUEST_SSP field of the VMCS.
>
> Suggested-by: Sean Christopherson <seanjc@...gle.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@...el.com>
> Tested-by: Mathias Krause <minipli@...ecurity.net>
> Tested-by: John Allen <john.allen@....com>
> Tested-by: Rick Edgecombe <rick.p.edgecombe@...el.com>
> Signed-off-by: Chao Gao <chao.gao@...el.com>
> ---
> v14:
> - Update both hardware MSR value and VMCS field when userspace writes to
> MSR_IA32_S_CET. This keeps guest FPU and VMCS always inconsistent
> regarding MSR_IA32_S_CET.
> ---
> arch/x86/kvm/vmx/vmx.c | 19 +++++++++++++
> arch/x86/kvm/x86.c | 60 ++++++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/x86.h | 23 ++++++++++++++++
> 3 files changed, 102 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 227b45430ad8..22bd71bebfad 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2106,6 +2106,15 @@ int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> else
> msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
> break;
> + case MSR_IA32_S_CET:
> + msr_info->data = vmcs_readl(GUEST_S_CET);
> + break;
> + case MSR_KVM_INTERNAL_GUEST_SSP:
> + msr_info->data = vmcs_readl(GUEST_SSP);
> + break;
> + case MSR_IA32_INT_SSP_TAB:
> + msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> + break;
> case MSR_IA32_DEBUGCTLMSR:
> msr_info->data = vmx_guest_debugctl_read();
> break;
> @@ -2424,6 +2433,16 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> else
> vmx->pt_desc.guest.addr_a[index / 2] = data;
> break;
> + case MSR_IA32_S_CET:
> + vmcs_writel(GUEST_S_CET, data);
> + kvm_set_xstate_msr(vcpu, msr_info);
> + break;
> + case MSR_KVM_INTERNAL_GUEST_SSP:
> + vmcs_writel(GUEST_SSP, data);
> + break;
> + case MSR_IA32_INT_SSP_TAB:
> + vmcs_writel(GUEST_INTR_SSP_TABLE, data);
> + break;
> case MSR_IA32_PERF_CAPABILITIES:
> if (data & PMU_CAP_LBR_FMT) {
> if ((data & PMU_CAP_LBR_FMT) !=
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a6036eab3852..79861b7ad44d 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1886,6 +1886,44 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
>
> data = (u32)data;
> break;
> + case MSR_IA32_U_CET:
> + case MSR_IA32_S_CET:
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
> + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT))
> + return KVM_MSR_RET_UNSUPPORTED;
> + if (!kvm_is_valid_u_s_cet(vcpu, data))
> + return 1;
> + break;
> + case MSR_KVM_INTERNAL_GUEST_SSP:
> + if (!host_initiated)
> + return 1;
> + fallthrough;
> + /*
> + * Note that the MSR emulation here is flawed when a vCPU
> + * doesn't support the Intel 64 architecture. The expected
> + * architectural behavior in this case is that the upper 32
> + * bits do not exist and should always read '0'. However,
> + * because the actual hardware on which the virtual CPU is
> + * running does support Intel 64, XRSTORS/XSAVES in the
> + * guest could observe behavior that violates the
> + * architecture. Intercepting XRSTORS/XSAVES for this
> + * special case isn't deemed worthwhile.
> + */
> + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
> + return KVM_MSR_RET_UNSUPPORTED;
> + /*
> + * MSR_IA32_INT_SSP_TAB is not present on processors that do
> + * not support Intel 64 architecture.
> + */
> + if (index == MSR_IA32_INT_SSP_TAB && !guest_cpu_cap_has(vcpu, X86_FEATURE_LM))
> + return KVM_MSR_RET_UNSUPPORTED;
> + if (is_noncanonical_msr_address(data, vcpu))
> + return 1;
> + /* All SSP MSRs except MSR_IA32_INT_SSP_TAB must be 4-byte aligned */
> + if (index != MSR_IA32_INT_SSP_TAB && !IS_ALIGNED(data, 4))
> + return 1;
> + break;
> }
>
> msr.data = data;
> @@ -1930,6 +1968,20 @@ static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
> !guest_cpu_cap_has(vcpu, X86_FEATURE_RDPID))
> return 1;
> break;
> + case MSR_IA32_U_CET:
> + case MSR_IA32_S_CET:
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
> + !guest_cpu_cap_has(vcpu, X86_FEATURE_IBT))
> + return KVM_MSR_RET_UNSUPPORTED;
> + break;
> + case MSR_KVM_INTERNAL_GUEST_SSP:
> + if (!host_initiated)
> + return 1;
> + fallthrough;
> + case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK))
> + return KVM_MSR_RET_UNSUPPORTED;
> + break;
> }
>
> msr.index = index;
> @@ -4220,6 +4272,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> vcpu->arch.guest_fpu.xfd_err = data;
> break;
> #endif
> + case MSR_IA32_U_CET:
> + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> + kvm_set_xstate_msr(vcpu, msr_info);
> + break;
> default:
> if (kvm_pmu_is_valid_msr(vcpu, msr))
> return kvm_pmu_set_msr(vcpu, msr_info);
> @@ -4569,6 +4625,10 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = vcpu->arch.guest_fpu.xfd_err;
> break;
> #endif
> + case MSR_IA32_U_CET:
> + case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> + kvm_get_xstate_msr(vcpu, msr_info);
> + break;
> default:
> if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
> return kvm_pmu_get_msr(vcpu, msr_info);
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index cf4f73a95825..95d2a82a4674 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -735,4 +735,27 @@ static inline void kvm_set_xstate_msr(struct kvm_vcpu *vcpu,
> kvm_fpu_put();
> }
>
> +#define CET_US_RESERVED_BITS GENMASK(9, 6)
> +#define CET_US_SHSTK_MASK_BITS GENMASK(1, 0)
> +#define CET_US_IBT_MASK_BITS (GENMASK_ULL(5, 2) | GENMASK_ULL(63, 10))
> +#define CET_US_LEGACY_BITMAP_BASE(data) ((data) >> 12)
> +
> +static inline bool kvm_is_valid_u_s_cet(struct kvm_vcpu *vcpu, u64 data)
> +{
> + if (data & CET_US_RESERVED_BITS)
> + return false;
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
> + (data & CET_US_SHSTK_MASK_BITS))
> + return false;
> + if (!guest_cpu_cap_has(vcpu, X86_FEATURE_IBT) &&
> + (data & CET_US_IBT_MASK_BITS))
> + return false;
> + if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
> + return false;
> + /* IBT can be suppressed iff the TRACKER isn't WAIT_ENDBR. */
> + if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
> + return false;
> +
> + return true;
> +}
> #endif
Powered by blists - more mailing lists