lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240226143630.33643-25-jiangshanlai@gmail.com>
Date: Mon, 26 Feb 2024 22:35:41 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Lai Jiangshan <jiangshan.ljs@...group.com>,
	Hou Wenlong <houwenlong.hwl@...group.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Sean Christopherson <seanjc@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	kvm@...r.kernel.org,
	Paolo Bonzini <pbonzini@...hat.com>,
	x86@...nel.org,
	Kees Cook <keescook@...omium.org>,
	Juergen Gross <jgross@...e.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>
Subject: [RFC PATCH 24/73] KVM: x86/PVM: Introduce PVM mode switching

From: Lai Jiangshan <jiangshan.ljs@...group.com>

In PVM ABI, CPL is not used directly. Instead, supervisor mode and user
mode are used to represent the original CPL0/CPL3 concept. It is assumed
that the kernel runs in supervisor mode and userspace runs in user mode.
>From the x86 operating modes perspective, the PVM supervisor mode is a
modified 64-bit long mode. Therefore, 32-bit compatibility mode is not
allowed for the supervisor mode, and its hardware CS must be __USER_CS.

When switching to user mode, the stack and GS base of supervisor mode
are saved into the associated MSRs. When switching back from user mode,
the stack and GS base of supervisor mode are automatically restored from
the MSRs. Therefore, in PVM ABI, the value of MSR_KERNEL_GS_BASE in
supervisor mode is the same as the value of MSR_GS_BASE in supervisor
mode, which does not follow the x86 ABI.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@...group.com>
---
 arch/x86/kvm/pvm/pvm.c | 129 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/pvm/pvm.h |   1 +
 2 files changed, 130 insertions(+)

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 69f8fbbb6176..3735baee1d5f 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -31,6 +31,22 @@ static bool __read_mostly is_intel;
 
 static unsigned long host_idt_base;
 
+static inline bool is_smod(struct vcpu_pvm *pvm)
+{
+	unsigned long switch_flags = pvm->switch_flags;
+
+	if ((switch_flags & SWITCH_FLAGS_MOD_TOGGLE) == SWITCH_FLAGS_SMOD)
+		return true;
+
+	WARN_ON_ONCE((switch_flags & SWITCH_FLAGS_MOD_TOGGLE) != SWITCH_FLAGS_UMOD);
+	return false;
+}
+
+static inline void pvm_switch_flags_toggle_mod(struct vcpu_pvm *pvm)
+{
+	pvm->switch_flags ^= SWITCH_FLAGS_MOD_TOGGLE;
+}
+
 static inline u16 kernel_cs_by_msr(u64 msr_star)
 {
 	// [47..32]
@@ -80,6 +96,82 @@ static inline void __load_fs_base(struct vcpu_pvm *pvm)
 	wrmsrl(MSR_FS_BASE, pvm->segments[VCPU_SREG_FS].base);
 }
 
+static u64 pvm_read_guest_gs_base(struct vcpu_pvm *pvm)
+{
+	preempt_disable();
+	if (pvm->loaded_cpu_state)
+		__save_gs_base(pvm);
+	preempt_enable();
+
+	return pvm->segments[VCPU_SREG_GS].base;
+}
+
+static u64 pvm_read_guest_fs_base(struct vcpu_pvm *pvm)
+{
+	preempt_disable();
+	if (pvm->loaded_cpu_state)
+		__save_fs_base(pvm);
+	preempt_enable();
+
+	return pvm->segments[VCPU_SREG_FS].base;
+}
+
+static u64 pvm_read_guest_kernel_gs_base(struct vcpu_pvm *pvm)
+{
+	return pvm->msr_kernel_gs_base;
+}
+
+static void pvm_write_guest_gs_base(struct vcpu_pvm *pvm, u64 data)
+{
+	preempt_disable();
+	pvm->segments[VCPU_SREG_GS].base = data;
+	if (pvm->loaded_cpu_state)
+		__load_gs_base(pvm);
+	preempt_enable();
+}
+
+static void pvm_write_guest_fs_base(struct vcpu_pvm *pvm, u64 data)
+{
+	preempt_disable();
+	pvm->segments[VCPU_SREG_FS].base = data;
+	if (pvm->loaded_cpu_state)
+		__load_fs_base(pvm);
+	preempt_enable();
+}
+
+static void pvm_write_guest_kernel_gs_base(struct vcpu_pvm *pvm, u64 data)
+{
+	pvm->msr_kernel_gs_base = data;
+}
+
+// switch_to_smod() and switch_to_umod() switch the mode (smod/umod) and
+// the CR3.  No vTLB flushing when switching the CR3 per PVM Spec.
+static inline void switch_to_smod(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+
+	pvm_switch_flags_toggle_mod(pvm);
+	kvm_mmu_new_pgd(vcpu, pvm->msr_switch_cr3);
+	swap(pvm->msr_switch_cr3, vcpu->arch.cr3);
+
+	pvm_write_guest_gs_base(pvm, pvm->msr_kernel_gs_base);
+	kvm_rsp_write(vcpu, pvm->msr_supervisor_rsp);
+
+	pvm->hw_cs = __USER_CS;
+	pvm->hw_ss = __USER_DS;
+}
+
+static inline void switch_to_umod(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+
+	pvm->msr_supervisor_rsp = kvm_rsp_read(vcpu);
+
+	pvm_switch_flags_toggle_mod(pvm);
+	kvm_mmu_new_pgd(vcpu, pvm->msr_switch_cr3);
+	swap(pvm->msr_switch_cr3, vcpu->arch.cr3);
+}
+
 /*
  * Test whether DS, ES, FS and GS need to be reloaded.
  *
@@ -309,6 +401,15 @@ static int pvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	int ret = 0;
 
 	switch (msr_info->index) {
+	case MSR_FS_BASE:
+		msr_info->data = pvm_read_guest_fs_base(pvm);
+		break;
+	case MSR_GS_BASE:
+		msr_info->data = pvm_read_guest_gs_base(pvm);
+		break;
+	case MSR_KERNEL_GS_BASE:
+		msr_info->data = pvm_read_guest_kernel_gs_base(pvm);
+		break;
 	case MSR_STAR:
 		msr_info->data = pvm->msr_star;
 		break;
@@ -352,6 +453,9 @@ static int pvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_PVM_RETS_RIP:
 		msr_info->data = pvm->msr_rets_rip_plus2 - 2;
 		break;
+	case MSR_PVM_SWITCH_CR3:
+		msr_info->data = pvm->msr_switch_cr3;
+		break;
 	default:
 		ret = kvm_get_msr_common(vcpu, msr_info);
 	}
@@ -372,6 +476,15 @@ static int pvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	u64 data = msr_info->data;
 
 	switch (msr_index) {
+	case MSR_FS_BASE:
+		pvm_write_guest_fs_base(pvm, data);
+		break;
+	case MSR_GS_BASE:
+		pvm_write_guest_gs_base(pvm, data);
+		break;
+	case MSR_KERNEL_GS_BASE:
+		pvm_write_guest_kernel_gs_base(pvm, data);
+		break;
 	case MSR_STAR:
 		/*
 		 * Guest KERNEL_CS/DS shouldn't be NULL and guest USER_CS/DS
@@ -436,6 +549,9 @@ static int pvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_PVM_RETS_RIP:
 		pvm->msr_rets_rip_plus2 = msr_info->data + 2;
 		break;
+	case MSR_PVM_SWITCH_CR3:
+		pvm->msr_switch_cr3 = msr_info->data;
+		break;
 	default:
 		ret = kvm_set_msr_common(vcpu, msr_info);
 	}
@@ -443,6 +559,13 @@ static int pvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	return ret;
 }
 
+static int pvm_get_cpl(struct kvm_vcpu *vcpu)
+{
+	if (is_smod(to_pvm(vcpu)))
+		return 0;
+	return 3;
+}
+
 static void pvm_setup_mce(struct kvm_vcpu *vcpu)
 {
 }
@@ -683,6 +806,11 @@ static fastpath_t pvm_vcpu_run(struct kvm_vcpu *vcpu)
 
 	pvm_vcpu_run_noinstr(vcpu);
 
+	if (is_smod(pvm)) {
+		if (pvm->hw_cs != __USER_CS || pvm->hw_ss != __USER_DS)
+			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+	}
+
 	pvm_load_host_xsave_state(vcpu);
 
 	return EXIT_FASTPATH_NONE;
@@ -949,6 +1077,7 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = {
 	.get_msr_feature = pvm_get_msr_feature,
 	.get_msr = pvm_get_msr,
 	.set_msr = pvm_set_msr,
+	.get_cpl = pvm_get_cpl,
 	.load_mmu_pgd = pvm_load_mmu_pgd,
 
 	.vcpu_pre_run = pvm_vcpu_pre_run,
diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h
index 57ca2e901e0d..b0c633ce2987 100644
--- a/arch/x86/kvm/pvm/pvm.h
+++ b/arch/x86/kvm/pvm/pvm.h
@@ -61,6 +61,7 @@ struct vcpu_pvm {
 	u64 unused_MSR_IA32_SYSENTER_CS;
 	u64 unused_MSR_IA32_SYSENTER_EIP;
 	u64 unused_MSR_IA32_SYSENTER_ESP;
+	u64 msr_kernel_gs_base;
 	u64 msr_tsc_aux;
 	/*
 	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
-- 
2.19.1.6.gb485710b


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ