[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250704085027.182163-20-chao.gao@intel.com>
Date: Fri, 4 Jul 2025 01:49:50 -0700
From: Chao Gao <chao.gao@...el.com>
To: kvm@...r.kernel.org,
linux-kernel@...r.kernel.org,
x86@...nel.org,
seanjc@...gle.com,
pbonzini@...hat.com,
dave.hansen@...el.com
Cc: rick.p.edgecombe@...el.com,
mlevitsk@...hat.com,
john.allen@....com,
weijiang.yang@...el.com,
minipli@...ecurity.net,
xin@...or.com,
Chao Gao <chao.gao@...el.com>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...hat.com>,
Borislav Petkov <bp@...en8.de>,
Dave Hansen <dave.hansen@...ux.intel.com>,
"H. Peter Anvin" <hpa@...or.com>
Subject: [PATCH v11 19/23] KVM: x86: Enable CET virtualization for VMX and advertise to userspace
From: Yang Weijiang <weijiang.yang@...el.com>
Expose CET features to guest if KVM/host can support them, clear CPUID
feature bits if KVM/host cannot support.
Set CPUID feature bits so that CET features are available in guest CPUID.
Add CR4.CET bit support in order to allow guest set CET master control
bit.
Disable KVM CET feature if unrestricted_guest is unsupported/disabled as
KVM does not support emulating CET.
The CET load-bits in VM_ENTRY/VM_EXIT control fields should be set to make
guest CET xstates isolated from host's.
On platforms with VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error
code will fail, and if VMX_BASIC[bit56] == 1, #CP injection with or without
error code is allowed. Disable CET feature bits if the MSR bit is cleared
so that nested VMM can inject #CP if and only if VMX_BASIC[bit56] == 1.
Don't expose CET feature if either of {U,S}_CET xstate bits is cleared
in host XSS or if XSAVES isn't supported.
CET MSRs are reset to 0s after RESET, power-up and INIT, clear guest CET
xsave-area fields so that guest CET MSRs are reset to 0s after the events.
Meanwhile explicitly disable SHSTK and IBT for SVM because CET KVM enabling
for SVM is not ready.
Signed-off-by: Yang Weijiang <weijiang.yang@...el.com>
Signed-off-by: Chao Gao <chao.gao@...el.com>
---
v11:
1) Remove IBT CPUID reference to raw CPUID info after discussion.
2) Disable SHSTK/IBT support in SVM explicitly per Sean's comment.
3) Handle GUEST_S_CET field as a common field for SHSTK and IBT.
---
arch/x86/include/asm/kvm_host.h | 2 +-
arch/x86/include/asm/vmx.h | 1 +
arch/x86/kvm/cpuid.c | 2 ++
arch/x86/kvm/svm/svm.c | 4 ++++
arch/x86/kvm/vmx/capabilities.h | 5 +++++
arch/x86/kvm/vmx/vmx.c | 30 +++++++++++++++++++++++++++++-
arch/x86/kvm/vmx/vmx.h | 6 ++++--
arch/x86/kvm/x86.c | 22 +++++++++++++++++++---
arch/x86/kvm/x86.h | 3 +++
9 files changed, 68 insertions(+), 7 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 30d9d434c048..2aca91c7ae1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -142,7 +142,7 @@
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
| X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
| X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \
- | X86_CR4_LAM_SUP))
+ | X86_CR4_LAM_SUP | X86_CR4_CET))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index ce10a7e2d3d9..c85c50019523 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -134,6 +134,7 @@
#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
#define VMX_BASIC_INOUT BIT_ULL(54)
#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+#define VMX_BASIC_NO_HW_ERROR_CODE_CC BIT_ULL(56)
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9b45607f9b37..7007ce792706 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -944,6 +944,7 @@ void kvm_set_cpu_caps(void)
VENDOR_F(WAITPKG),
F(SGX_LC),
F(BUS_LOCK_DETECT),
+ F(SHSTK),
);
/*
@@ -970,6 +971,7 @@ void kvm_set_cpu_caps(void)
F(AMX_INT8),
F(AMX_BF16),
F(FLUSH_L1D),
+ F(IBT),
);
if (boot_cpu_has(X86_FEATURE_AMD_IBPB_RET) &&
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 803574920e41..6375695ce285 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5223,6 +5223,10 @@ static __init void svm_set_cpu_caps(void)
kvm_caps.supported_perf_cap = 0;
kvm_caps.supported_xss = 0;
+ /* KVM doesn't yet support CET virtualization for SVM. */
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+
/* CPUID 0x80000001 and 0x8000000A (SVM features) */
if (nested) {
kvm_cpu_cap_set(X86_FEATURE_SVM);
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 0f0e1717dc80..09c130d2e595 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -77,6 +77,11 @@ static inline bool cpu_has_vmx_basic_inout(void)
return vmcs_config.basic & VMX_BASIC_INOUT;
}
+static inline bool cpu_has_vmx_basic_no_hw_errcode(void)
+{
+ return vmcs_config.basic & VMX_BASIC_NO_HW_ERROR_CODE_CC;
+}
+
static inline bool cpu_has_virtual_nmis(void)
{
return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS &&
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3d6da3836e6b..d837876e3726 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2598,6 +2598,7 @@ static int setup_vmcs_config(struct vmcs_config *vmcs_conf,
{ VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER },
{ VM_ENTRY_LOAD_BNDCFGS, VM_EXIT_CLEAR_BNDCFGS },
{ VM_ENTRY_LOAD_IA32_RTIT_CTL, VM_EXIT_CLEAR_IA32_RTIT_CTL },
+ { VM_ENTRY_LOAD_CET_STATE, VM_EXIT_LOAD_CET_STATE },
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
@@ -4862,6 +4863,14 @@ void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); /* 22.2.1 */
+ if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
+ vmcs_writel(GUEST_SSP, 0);
+ vmcs_writel(GUEST_INTR_SSP_TABLE, 0);
+ }
+ if (kvm_cpu_cap_has(X86_FEATURE_IBT) ||
+ kvm_cpu_cap_has(X86_FEATURE_SHSTK))
+ vmcs_writel(GUEST_S_CET, 0);
+
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
vpid_sync_context(vmx->vpid);
@@ -6310,6 +6319,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
if (vmcs_read32(VM_EXIT_MSR_STORE_COUNT) > 0)
vmx_dump_msrs("guest autostore", &vmx->msr_autostore.guest);
+ if (vmentry_ctl & VM_ENTRY_LOAD_CET_STATE)
+ pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n",
+ vmcs_readl(GUEST_S_CET), vmcs_readl(GUEST_SSP),
+ vmcs_readl(GUEST_INTR_SSP_TABLE));
pr_err("*** Host State ***\n");
pr_err("RIP = 0x%016lx RSP = 0x%016lx\n",
vmcs_readl(HOST_RIP), vmcs_readl(HOST_RSP));
@@ -6340,6 +6353,10 @@ void dump_vmcs(struct kvm_vcpu *vcpu)
vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
if (vmcs_read32(VM_EXIT_MSR_LOAD_COUNT) > 0)
vmx_dump_msrs("host autoload", &vmx->msr_autoload.host);
+ if (vmexit_ctl & VM_EXIT_LOAD_CET_STATE)
+ pr_err("S_CET = 0x%016lx, SSP = 0x%016lx, SSP TABLE = 0x%016lx\n",
+ vmcs_readl(HOST_S_CET), vmcs_readl(HOST_SSP),
+ vmcs_readl(HOST_INTR_SSP_TABLE));
pr_err("*** Control State ***\n");
pr_err("CPUBased=0x%08x SecondaryExec=0x%08x TertiaryExec=0x%016llx\n",
@@ -7917,7 +7934,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_set(X86_FEATURE_UMIP);
/* CPUID 0xD.1 */
- kvm_caps.supported_xss = 0;
if (!cpu_has_vmx_xsaves())
kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
@@ -7929,6 +7945,18 @@ static __init void vmx_set_cpu_caps(void)
if (cpu_has_vmx_waitpkg())
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
+
+ /*
+ * Disable CET if unrestricted_guest is unsupported as KVM doesn't
+ * enforce CET HW behaviors in emulator. On platforms with
+ * VMX_BASIC[bit56] == 0, inject #CP at VMX entry with error code
+ * fails, so disable CET in this case too.
+ */
+ if (!cpu_has_load_cet_ctrl() || !enable_unrestricted_guest ||
+ !cpu_has_vmx_basic_no_hw_errcode()) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ }
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 87174d961c85..f93062965a7a 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -482,7 +482,8 @@ static inline u8 vmx_get_rvi(void)
VM_ENTRY_LOAD_IA32_EFER | \
VM_ENTRY_LOAD_BNDCFGS | \
VM_ENTRY_PT_CONCEAL_PIP | \
- VM_ENTRY_LOAD_IA32_RTIT_CTL)
+ VM_ENTRY_LOAD_IA32_RTIT_CTL | \
+ VM_ENTRY_LOAD_CET_STATE)
#define __KVM_REQUIRED_VMX_VM_EXIT_CONTROLS \
(VM_EXIT_SAVE_DEBUG_CONTROLS | \
@@ -504,7 +505,8 @@ static inline u8 vmx_get_rvi(void)
VM_EXIT_LOAD_IA32_EFER | \
VM_EXIT_CLEAR_BNDCFGS | \
VM_EXIT_PT_CONCEAL_PIP | \
- VM_EXIT_CLEAR_IA32_RTIT_CTL)
+ VM_EXIT_CLEAR_IA32_RTIT_CTL | \
+ VM_EXIT_LOAD_CET_STATE)
#define KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL \
(PIN_BASED_EXT_INTR_MASK | \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b17a8bf84db3..3b99124f8985 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -223,7 +223,8 @@ static struct kvm_user_return_msrs __percpu *user_return_msrs;
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
-#define KVM_SUPPORTED_XSS 0
+#define KVM_SUPPORTED_XSS (XFEATURE_MASK_CET_USER | \
+ XFEATURE_MASK_CET_KERNEL)
bool __read_mostly allow_smaller_maxphyaddr = 0;
EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr);
@@ -10073,6 +10074,20 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
kvm_caps.supported_xss = 0;
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~(XFEATURE_MASK_CET_USER |
+ XFEATURE_MASK_CET_KERNEL);
+
+ if ((kvm_caps.supported_xss & (XFEATURE_MASK_CET_USER |
+ XFEATURE_MASK_CET_KERNEL)) !=
+ (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~(XFEATURE_MASK_CET_USER |
+ XFEATURE_MASK_CET_KERNEL);
+ }
+
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
@@ -12729,10 +12744,11 @@ static void kvm_xstate_reset(struct kvm_vcpu *vcpu, bool init_event)
/*
* On INIT, only select XSTATE components are zeroed, most components
* are unchanged. Currently, the only components that are zeroed and
- * supported by KVM are MPX related.
+ * supported by KVM are MPX and CET related.
*/
xfeatures_mask = (kvm_caps.supported_xcr0 | kvm_caps.supported_xss) &
- (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+ (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR |
+ XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL);
if (!xfeatures_mask)
return;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 8d2049a1f41b..e54c779e74a3 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -647,6 +647,9 @@ static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
__reserved_bits |= X86_CR4_PCIDE; \
if (!__cpu_has(__c, X86_FEATURE_LAM)) \
__reserved_bits |= X86_CR4_LAM_SUP; \
+ if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \
+ !__cpu_has(__c, X86_FEATURE_IBT)) \
+ __reserved_bits |= X86_CR4_CET; \
__reserved_bits; \
})
--
2.47.1
Powered by blists - more mailing lists