lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZM1tNJ9ZdQb+VZVo@google.com>
Date:   Fri, 4 Aug 2023 14:27:16 -0700
From:   Sean Christopherson <seanjc@...gle.com>
To:     Chao Gao <chao.gao@...el.com>
Cc:     Yang Weijiang <weijiang.yang@...el.com>, pbonzini@...hat.com,
        peterz@...radead.org, john.allen@....com, kvm@...r.kernel.org,
        linux-kernel@...r.kernel.org, rick.p.edgecombe@...el.com,
        binbin.wu@...ux.intel.com
Subject: Re: [PATCH v5 11/19] KVM:VMX: Emulate read and write to CET MSRs

On Fri, Aug 04, 2023, Chao Gao wrote:
> On Thu, Aug 03, 2023 at 12:27:24AM -0400, Yang Weijiang wrote:
> >Add emulation interface for CET MSR read and write.
> >The emulation code is split into common part and vendor specific
> >part, the former resides in x86.c to benefic different x86 CPU
> >vendors, the latter for VMX is implemented in this patch.
> >
> >Signed-off-by: Yang Weijiang <weijiang.yang@...el.com>
> >---
> > arch/x86/kvm/vmx/vmx.c |  27 +++++++++++
> > arch/x86/kvm/x86.c     | 104 +++++++++++++++++++++++++++++++++++++----
> > arch/x86/kvm/x86.h     |  18 +++++++
> > 3 files changed, 141 insertions(+), 8 deletions(-)
> >
> >diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> >index 6aa76124e81e..ccf750e79608 100644
> >--- a/arch/x86/kvm/vmx/vmx.c
> >+++ b/arch/x86/kvm/vmx/vmx.c
> >@@ -2095,6 +2095,18 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > 		else
> > 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
> > 		break;
> >+	case MSR_IA32_S_CET:
> >+	case MSR_KVM_GUEST_SSP:
> >+	case MSR_IA32_INT_SSP_TAB:
> >+		if (kvm_get_msr_common(vcpu, msr_info))
> >+			return 1;
> >+		if (msr_info->index == MSR_KVM_GUEST_SSP)
> >+			msr_info->data = vmcs_readl(GUEST_SSP);
> >+		else if (msr_info->index == MSR_IA32_S_CET)
> >+			msr_info->data = vmcs_readl(GUEST_S_CET);
> >+		else if (msr_info->index == MSR_IA32_INT_SSP_TAB)
> >+			msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
> 
> This if-else-if suggests that they are focibly grouped together to just
> share the call of kvm_get_msr_common(). For readability, I think it is better
> to handle them separately.
> 
> e.g.,
> 	case MSR_IA32_S_CET:
> 		if (kvm_get_msr_common(vcpu, msr_info))
> 			return 1;
> 		msr_info->data = vmcs_readl(GUEST_S_CET);
> 		break;
> 
> 	case MSR_KVM_GUEST_SSP:
> 		if (kvm_get_msr_common(vcpu, msr_info))
> 			return 1;
> 		msr_info->data = vmcs_readl(GUEST_SSP);
> 		break;

Actually, we can do even better.  We have an existing framework for these types
of prechecks, I just completely forgot about it :-(  (my "look at PAT" was a bad
suggestion).

Handle the checks in __kvm_set_msr() and __kvm_get_msr(), i.e. *before* calling
into vendor code.  Then vendor code doesn't need to make weird callbacks.

> > int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > {
> > 	u32 msr = msr_info->index;
> >@@ -3981,6 +4014,45 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > 		vcpu->arch.guest_fpu.xfd_err = data;
> > 		break;
> > #endif
> >+#define CET_EXCLUSIVE_BITS		(CET_SUPPRESS | CET_WAIT_ENDBR)
> >+#define CET_CTRL_RESERVED_BITS		GENMASK(9, 6)

Please use a single namespace for these #defines, e.g. CET_CTRL_* or maybe
CET_US_* for everything.

> >+#define CET_SHSTK_MASK_BITS		GENMASK(1, 0)
> >+#define CET_IBT_MASK_BITS		(GENMASK_ULL(5, 2) | \
> >+					 GENMASK_ULL(63, 10))
> >+#define CET_LEG_BITMAP_BASE(data)	((data) >> 12)

Bah, stupid SDM.  Please spell out "LEGACY", I though "LEG" was short for "LEGAL"
since this looks a lot like a page shift, i.e. getting a pfn.

> >+static bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu,
> >+				      struct msr_data *msr)
> >+{
> >+	if (is_shadow_stack_msr(msr->index)) {
> >+		if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK))
> >+			return false;
> >+
> >+		if (msr->index == MSR_KVM_GUEST_SSP)
> >+			return msr->host_initiated;
> >+
> >+		return msr->host_initiated ||
> >+			guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
> >+	}
> >+
> >+	if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
> >+	    !kvm_cpu_cap_has(X86_FEATURE_IBT))
> >+		return false;
> >+
> >+	return msr->host_initiated ||
> >+		guest_cpuid_has(vcpu, X86_FEATURE_IBT) ||
> >+		guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);

Similar to my suggestsion for XSS, I think we drop the waiver for host_initiated
accesses, i.e. require the feature to be enabled and exposed to the guest, even
for the host.

> >diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> >index c69fc027f5ec..3b79d6db2f83 100644
> >--- a/arch/x86/kvm/x86.h
> >+++ b/arch/x86/kvm/x86.h
> >@@ -552,4 +552,22 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
> > 			 unsigned int port, void *data,  unsigned int count,
> > 			 int in);
> > 
> >+/*
> >+ * Guest xstate MSRs have been loaded in __msr_io(), disable preemption before
> >+ * access the MSRs to avoid MSR content corruption.
> >+ */
> 
> I think it is better to describe what the function does prior to jumping into
> details like where guest FPU is loaded.
> 
> /*
>  * Lock and/or reload guest FPU and access xstate MSRs. For accesses initiated
>  * by host, guest FPU is loaded in __msr_io(). For accesses initiated by guest,
>  * guest FPU should have been loaded already.
>  */
> >+static inline void kvm_get_xsave_msr(struct msr_data *msr_info)
> >+{
> >+	kvm_fpu_get();
> >+	rdmsrl(msr_info->index, msr_info->data);
> >+	kvm_fpu_put();
> >+}
> >+
> >+static inline void kvm_set_xsave_msr(struct msr_data *msr_info)
> >+{
> >+	kvm_fpu_get();
> >+	wrmsrl(msr_info->index, msr_info->data);
> >+	kvm_fpu_put();
> >+}
> 
> Can you rename functions to kvm_get/set_xstate_msr() to align with the comment
> and patch 6? And if there is no user outside x86.c, you can just put these two
> functions right after the is_xstate_msr() added in patch 6.

+1.  These should also assert that (a) guest FPU state is loaded and (b) the MSR
is passed through to the guest.  I might be ok dropping (b) if both VMX and SVM
passthrough all MSRs if they're exposed to the guest, i.e. not lazily passed
through.

Sans any changes to kvm_{g,s}et_xsave_msr(), I think this?  (completely untested)


---
 arch/x86/kvm/vmx/vmx.c |  34 +++-------
 arch/x86/kvm/x86.c     | 151 +++++++++++++++--------------------------
 2 files changed, 64 insertions(+), 121 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 491039aeb61b..1211eb469d06 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2100,16 +2100,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
 		break;
 	case MSR_IA32_S_CET:
+		msr_info->data = vmcs_readl(GUEST_S_CET);
+		break;
 	case MSR_KVM_GUEST_SSP:
+		msr_info->data = vmcs_readl(GUEST_SSP);
+		break;
 	case MSR_IA32_INT_SSP_TAB:
-		if (kvm_get_msr_common(vcpu, msr_info))
-			return 1;
-		if (msr_info->index == MSR_KVM_GUEST_SSP)
-			msr_info->data = vmcs_readl(GUEST_SSP);
-		else if (msr_info->index == MSR_IA32_S_CET)
-			msr_info->data = vmcs_readl(GUEST_S_CET);
-		else if (msr_info->index == MSR_IA32_INT_SSP_TAB)
-			msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
+		msr_info->data = vmcs_readl(GUEST_INTR_SSP_TABLE);
 		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
@@ -2432,25 +2429,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		else
 			vmx->pt_desc.guest.addr_a[index / 2] = data;
 		break;
-	case MSR_IA32_PL0_SSP ... MSR_IA32_PL2_SSP:
-		if (kvm_set_msr_common(vcpu, msr_info))
-			return 1;
-		if (data) {
-			vmx_disable_write_intercept_sss_msr(vcpu);
-			wrmsrl(msr_index, data);
-		}
-		break;
 	case MSR_IA32_S_CET:
+		vmcs_writel(GUEST_S_CET, data);
+		break;
 	case MSR_KVM_GUEST_SSP:
+		vmcs_writel(GUEST_SSP, data);
+		break;
 	case MSR_IA32_INT_SSP_TAB:
-		if (kvm_set_msr_common(vcpu, msr_info))
-			return 1;
-		if (msr_index == MSR_KVM_GUEST_SSP)
-			vmcs_writel(GUEST_SSP, data);
-		else if (msr_index == MSR_IA32_S_CET)
-			vmcs_writel(GUEST_S_CET, data);
-		else if (msr_index == MSR_IA32_INT_SSP_TAB)
-			vmcs_writel(GUEST_INTR_SSP_TABLE, data);
+		vmcs_writel(GUEST_INTR_SSP_TABLE, data);
 		break;
 	case MSR_IA32_PERF_CAPABILITIES:
 		if (data && !vcpu_to_pmu(vcpu)->version)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7385fc25a987..75e6de7c9268 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1838,6 +1838,11 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
 }
 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 
+#define CET_US_RESERVED_BITS		GENMASK(9, 6)
+#define CET_US_SHSTK_MASK_BITS		GENMASK(1, 0)
+#define CET_US_IBT_MASK_BITS		(GENMASK_ULL(5, 2) | GENMASK_ULL(63, 10))
+#define CET_US_LEGACY_BITMAP_BASE(data)	((data) >> 12)
+
 /*
  * Write @data into the MSR specified by @index.  Select MSR specific fault
  * checks are bypassed if @host_initiated is %true.
@@ -1897,6 +1902,35 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
 
 		data = (u32)data;
 		break;
+	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
+		    !guest_can_use(vcpu, X86_FEATURE_IBT))
+		    	return 1;
+		if (data & CET_US_RESERVED_BITS)
+			return 1;
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
+		    (data & CET_US_SHSTK_MASK_BITS))
+			return 1;
+		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
+		    (data & CET_US_IBT_MASK_BITS))
+			return 1;
+		if (!IS_ALIGNED(CET_US_LEGACY_BITMAP_BASE(data), 4))
+			return 1;
+
+		/* IBT can be suppressed iff the TRACKER isn't WAIT_ENDR. */
+		if ((data & CET_SUPPRESS) && (data & CET_WAIT_ENDBR))
+			return 1;
+		break;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
+	case MSR_KVM_GUEST_SSP:
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
+			return 1;
+		if (is_noncanonical_address(data, vcpu))
+			return 1;
+		if (!IS_ALIGNED(data, 4))
+			return 1;
+		break;
 	}
 
 	msr.data = data;
@@ -1940,6 +1974,17 @@ static int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
 		    !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
 			return 1;
 		break;
+	case MSR_IA32_U_CET:
+	case MSR_IA32_S_CET:
+		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
+		    !guest_can_use(vcpu, X86_FEATURE_SHSTK))
+			return 1;
+		break;
+	case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
+	case MSR_KVM_GUEST_SSP:
+		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK))
+			return 1;
+		break;
 	}
 
 	msr.index = index;
@@ -3640,47 +3685,6 @@ static bool kvm_is_msr_to_save(u32 msr_index)
 	return false;
 }
 
-static inline bool is_shadow_stack_msr(u32 msr)
-{
-	return msr == MSR_IA32_PL0_SSP ||
-		msr == MSR_IA32_PL1_SSP ||
-		msr == MSR_IA32_PL2_SSP ||
-		msr == MSR_IA32_PL3_SSP ||
-		msr == MSR_IA32_INT_SSP_TAB ||
-		msr == MSR_KVM_GUEST_SSP;
-}
-
-static bool kvm_cet_is_msr_accessible(struct kvm_vcpu *vcpu,
-				      struct msr_data *msr)
-{
-	if (is_shadow_stack_msr(msr->index)) {
-		if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK))
-			return false;
-
-		/*
-		 * This MSR is synthesized mainly for userspace access during
-		 * Live Migration, it also can be accessed in SMM mode by VMM.
-		 * Guest is not allowed to access this MSR.
-		 */
-		if (msr->index == MSR_KVM_GUEST_SSP) {
-			if (IS_ENABLED(CONFIG_X86_64) && is_smm(vcpu))
-				return true;
-
-			return msr->host_initiated;
-		}
-
-		return msr->host_initiated ||
-			guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
-	}
-
-	if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
-	    !kvm_cpu_cap_has(X86_FEATURE_IBT))
-		return false;
-
-	return msr->host_initiated ||
-		guest_cpuid_has(vcpu, X86_FEATURE_IBT) ||
-		guest_cpuid_has(vcpu, X86_FEATURE_SHSTK);
-}
 
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
@@ -4036,46 +4040,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.guest_fpu.xfd_err = data;
 		break;
 #endif
-#define CET_EXCLUSIVE_BITS		(CET_SUPPRESS | CET_WAIT_ENDBR)
-#define CET_CTRL_RESERVED_BITS		GENMASK(9, 6)
-#define CET_SHSTK_MASK_BITS		GENMASK(1, 0)
-#define CET_IBT_MASK_BITS		(GENMASK_ULL(5, 2) | \
-					 GENMASK_ULL(63, 10))
-#define CET_LEG_BITMAP_BASE(data)	((data) >> 12)
 	case MSR_IA32_U_CET:
-	case MSR_IA32_S_CET:
-		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
-			return 1;
-		if (!!(data & CET_CTRL_RESERVED_BITS))
-			return 1;
-		if (!guest_can_use(vcpu, X86_FEATURE_SHSTK) &&
-		    (data & CET_SHSTK_MASK_BITS))
-			return 1;
-		if (!guest_can_use(vcpu, X86_FEATURE_IBT) &&
-		    (data & CET_IBT_MASK_BITS))
-			return 1;
-		if (!IS_ALIGNED(CET_LEG_BITMAP_BASE(data), 4) ||
-		    (data & CET_EXCLUSIVE_BITS) == CET_EXCLUSIVE_BITS)
-			return 1;
-		if (msr == MSR_IA32_U_CET)
-			kvm_set_xsave_msr(msr_info);
-		break;
-	case MSR_KVM_GUEST_SSP:
-	case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
-		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
-			return 1;
-		if (is_noncanonical_address(data, vcpu))
-			return 1;
-		if (!IS_ALIGNED(data, 4))
-			return 1;
-		if (msr == MSR_IA32_PL0_SSP || msr == MSR_IA32_PL1_SSP ||
-		    msr == MSR_IA32_PL2_SSP) {
-			vcpu->arch.cet_s_ssp[msr - MSR_IA32_PL0_SSP] = data;
-			if (!vcpu->arch.cet_sss_active && data)
-				vcpu->arch.cet_sss_active = true;
-		} else if (msr == MSR_IA32_PL3_SSP) {
-			kvm_set_xsave_msr(msr_info);
-		}
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		kvm_set_xsave_msr(msr_info);
 		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr))
@@ -4436,17 +4403,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 #endif
 	case MSR_IA32_U_CET:
-	case MSR_IA32_S_CET:
-	case MSR_KVM_GUEST_SSP:
-	case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
-		if (!kvm_cet_is_msr_accessible(vcpu, msr_info))
-			return 1;
-		if (msr == MSR_IA32_PL0_SSP || msr == MSR_IA32_PL1_SSP ||
-		    msr == MSR_IA32_PL2_SSP) {
-			msr_info->data = vcpu->arch.cet_s_ssp[msr - MSR_IA32_PL0_SSP];
-		} else if (msr == MSR_IA32_U_CET || msr == MSR_IA32_PL3_SSP) {
-			kvm_get_xsave_msr(msr_info);
-		}
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		kvm_get_xsave_msr(msr_info);
 		break;
 	default:
 		if (kvm_pmu_is_valid_msr(vcpu, msr))
@@ -7330,9 +7288,13 @@ static void kvm_probe_msr_to_save(u32 msr_index)
 		break;
 	case MSR_IA32_U_CET:
 	case MSR_IA32_S_CET:
+		if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+		    !kvm_cpu_cap_has(X86_FEATURE_IBT))
+			return;
+		break;
 	case MSR_KVM_GUEST_SSP:
 	case MSR_IA32_PL0_SSP ... MSR_IA32_INT_SSP_TAB:
-		if (!kvm_is_cet_supported())
+		if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK))
 			return;
 		break;
 	default:
@@ -9664,13 +9626,8 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 		kvm_caps.supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
 	}
 	if (boot_cpu_has(X86_FEATURE_XSAVES)) {
-		u32 eax, ebx, ecx, edx;
-
-		cpuid_count(0xd, 1, &eax, &ebx, &ecx, &edx);
 		rdmsrl(MSR_IA32_XSS, host_xss);
 		kvm_caps.supported_xss = host_xss & KVM_SUPPORTED_XSS;
-		if (ecx & XFEATURE_MASK_CET_KERNEL)
-			kvm_caps.supported_xss |= XFEATURE_MASK_CET_KERNEL;
 	}
 
 	rdmsrl_safe(MSR_EFER, &host_efer);

base-commit: efb9177acd7a4df5883b844e1ec9c69ef0899c9c
-- 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ