lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240226143630.33643-24-jiangshanlai@gmail.com>
Date: Mon, 26 Feb 2024 22:35:40 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Lai Jiangshan <jiangshan.ljs@...group.com>,
	Hou Wenlong <houwenlong.hwl@...group.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Sean Christopherson <seanjc@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	kvm@...r.kernel.org,
	Paolo Bonzini <pbonzini@...hat.com>,
	x86@...nel.org,
	Kees Cook <keescook@...omium.org>,
	Juergen Gross <jgross@...e.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>
Subject: [RFC PATCH 23/73] KVM: x86/PVM: Handle event handling related MSR read/write operation

From: Lai Jiangshan <jiangshan.ljs@...group.com>

In the PVM event handling specification, the guest needs to register the
event entry into the associated MSRs before delivering the event.
Therefore, handling them in the get_msr()/set_msr() callbacks is
necessary to prepare for event delivery later. Additionally, the user
mode syscall event still uses the original syscall event entry, but only
MSR_LSTAR is used; other MSRs are ignored.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@...group.com>
---
 arch/x86/kvm/pvm/pvm.c | 188 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/pvm/pvm.h |   7 ++
 2 files changed, 195 insertions(+)

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 29c6d8da7c19..69f8fbbb6176 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -31,6 +31,33 @@ static bool __read_mostly is_intel;
 
 static unsigned long host_idt_base;
 
+static inline u16 kernel_cs_by_msr(u64 msr_star)
+{
+	// [47..32]
+	// and force rpl=0
+	return ((msr_star >> 32) & ~0x3);
+}
+
+static inline u16 kernel_ds_by_msr(u64 msr_star)
+{
+	// [47..32] + 8
+	// and force rpl=0
+	return ((msr_star >> 32) & ~0x3) + 8;
+}
+
+static inline u16 user_cs32_by_msr(u64 msr_star)
+{
+	// [63..48] is user_cs32 and force rpl=3
+	return ((msr_star >> 48) | 0x3);
+}
+
+static inline u16 user_cs_by_msr(u64 msr_star)
+{
+	// [63..48] is user_cs32, and [63..48] + 16 is user_cs
+	// and force rpl=3
+	return ((msr_star >> 48) | 0x3) + 16;
+}
+
 static inline void __save_gs_base(struct vcpu_pvm *pvm)
 {
 	// switcher will do a real hw swapgs, so use hw MSR_KERNEL_GS_BASE
@@ -261,6 +288,161 @@ static void pvm_sched_in(struct kvm_vcpu *vcpu, int cpu)
 {
 }
 
+static int pvm_get_msr_feature(struct kvm_msr_entry *msr)
+{
+	return 1;
+}
+
+static void pvm_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+	/* Accesses to MSRs are emulated in hypervisor, nothing to do here. */
+}
+
+/*
+ * Reads an msr value (of 'msr_index') into 'msr_info'.
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int pvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+	int ret = 0;
+
+	switch (msr_info->index) {
+	case MSR_STAR:
+		msr_info->data = pvm->msr_star;
+		break;
+	case MSR_LSTAR:
+		msr_info->data = pvm->msr_lstar;
+		break;
+	case MSR_SYSCALL_MASK:
+		msr_info->data = pvm->msr_syscall_mask;
+		break;
+	case MSR_CSTAR:
+		msr_info->data = pvm->unused_MSR_CSTAR;
+		break;
+	/*
+	 * Since SYSENTER is not supported for the guest, we return a bad
+	 * segment to the emulator when emulating the instruction for #GP.
+	 */
+	case MSR_IA32_SYSENTER_CS:
+		msr_info->data = GDT_ENTRY_INVALID_SEG;
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+		msr_info->data = pvm->unused_MSR_IA32_SYSENTER_EIP;
+		break;
+	case MSR_IA32_SYSENTER_ESP:
+		msr_info->data = pvm->unused_MSR_IA32_SYSENTER_ESP;
+		break;
+	case MSR_PVM_VCPU_STRUCT:
+		msr_info->data = pvm->msr_vcpu_struct;
+		break;
+	case MSR_PVM_SUPERVISOR_RSP:
+		msr_info->data = pvm->msr_supervisor_rsp;
+		break;
+	case MSR_PVM_SUPERVISOR_REDZONE:
+		msr_info->data = pvm->msr_supervisor_redzone;
+		break;
+	case MSR_PVM_EVENT_ENTRY:
+		msr_info->data = pvm->msr_event_entry;
+		break;
+	case MSR_PVM_RETU_RIP:
+		msr_info->data = pvm->msr_retu_rip_plus2 - 2;
+		break;
+	case MSR_PVM_RETS_RIP:
+		msr_info->data = pvm->msr_rets_rip_plus2 - 2;
+		break;
+	default:
+		ret = kvm_get_msr_common(vcpu, msr_info);
+	}
+
+	return ret;
+}
+
+/*
+ * Writes msr value into the appropriate "register".
+ * Returns 0 on success, non-0 otherwise.
+ * Assumes vcpu_load() was already called.
+ */
+static int pvm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+	int ret = 0;
+	u32 msr_index = msr_info->index;
+	u64 data = msr_info->data;
+
+	switch (msr_index) {
+	case MSR_STAR:
+		/*
+		 * Guest KERNEL_CS/DS shouldn't be NULL and guest USER_CS/DS
+		 * must be the same as the host USER_CS/DS.
+		 */
+		if (!msr_info->host_initiated) {
+			if (!kernel_cs_by_msr(data))
+				return 1;
+			if (user_cs_by_msr(data) != __USER_CS)
+				return 1;
+		}
+		pvm->msr_star = data;
+		break;
+	case MSR_LSTAR:
+		if (is_noncanonical_address(msr_info->data, vcpu))
+			return 1;
+		pvm->msr_lstar = data;
+		break;
+	case MSR_SYSCALL_MASK:
+		pvm->msr_syscall_mask = data;
+		break;
+	case MSR_CSTAR:
+		pvm->unused_MSR_CSTAR = data;
+		break;
+	case MSR_IA32_SYSENTER_CS:
+		pvm->unused_MSR_IA32_SYSENTER_CS = data;
+		break;
+	case MSR_IA32_SYSENTER_EIP:
+		pvm->unused_MSR_IA32_SYSENTER_EIP = data;
+		break;
+	case MSR_IA32_SYSENTER_ESP:
+		pvm->unused_MSR_IA32_SYSENTER_ESP = data;
+		break;
+	case MSR_PVM_VCPU_STRUCT:
+		if (!PAGE_ALIGNED(data))
+			return 1;
+		if (!data)
+			kvm_gpc_deactivate(&pvm->pvcs_gpc);
+		else if (kvm_gpc_activate(&pvm->pvcs_gpc, data, PAGE_SIZE))
+			return 1;
+
+		pvm->msr_vcpu_struct = data;
+		break;
+	case MSR_PVM_SUPERVISOR_RSP:
+		pvm->msr_supervisor_rsp = msr_info->data;
+		break;
+	case MSR_PVM_SUPERVISOR_REDZONE:
+		pvm->msr_supervisor_redzone = msr_info->data;
+		break;
+	case MSR_PVM_EVENT_ENTRY:
+		if (is_noncanonical_address(data, vcpu) ||
+		    is_noncanonical_address(data + 256, vcpu) ||
+		    is_noncanonical_address(data + 512, vcpu)) {
+			kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+			return 1;
+		}
+		pvm->msr_event_entry = msr_info->data;
+		break;
+	case MSR_PVM_RETU_RIP:
+		pvm->msr_retu_rip_plus2 = msr_info->data + 2;
+		break;
+	case MSR_PVM_RETS_RIP:
+		pvm->msr_rets_rip_plus2 = msr_info->data + 2;
+		break;
+	default:
+		ret = kvm_set_msr_common(vcpu, msr_info);
+	}
+
+	return ret;
+}
+
 static void pvm_setup_mce(struct kvm_vcpu *vcpu)
 {
 }
@@ -764,6 +946,9 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = {
 	.vcpu_load = pvm_vcpu_load,
 	.vcpu_put = pvm_vcpu_put,
 
+	.get_msr_feature = pvm_get_msr_feature,
+	.get_msr = pvm_get_msr,
+	.set_msr = pvm_set_msr,
 	.load_mmu_pgd = pvm_load_mmu_pgd,
 
 	.vcpu_pre_run = pvm_vcpu_pre_run,
@@ -779,6 +964,9 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = {
 	.nested_ops = &pvm_nested_ops,
 
 	.setup_mce = pvm_setup_mce,
+
+	.msr_filter_changed = pvm_msr_filter_changed,
+	.complete_emulated_msr = kvm_complete_insn_gp,
 };
 
 static struct kvm_x86_init_ops pvm_init_ops __initdata = {
diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h
index 123cfe1c3c6a..57ca2e901e0d 100644
--- a/arch/x86/kvm/pvm/pvm.h
+++ b/arch/x86/kvm/pvm/pvm.h
@@ -54,6 +54,13 @@ struct vcpu_pvm {
 	struct gfn_to_pfn_cache pvcs_gpc;
 
 	// emulated x86 msrs
+	u64 msr_lstar;
+	u64 msr_syscall_mask;
+	u64 msr_star;
+	u64 unused_MSR_CSTAR;
+	u64 unused_MSR_IA32_SYSENTER_CS;
+	u64 unused_MSR_IA32_SYSENTER_EIP;
+	u64 unused_MSR_IA32_SYSENTER_ESP;
 	u64 msr_tsc_aux;
 	/*
 	 * Only bits masked by msr_ia32_feature_control_valid_bits can be set in
-- 
2.19.1.6.gb485710b


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ