lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240226143630.33643-23-jiangshanlai@gmail.com>
Date: Mon, 26 Feb 2024 22:35:39 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Lai Jiangshan <jiangshan.ljs@...group.com>,
	Hou Wenlong <houwenlong.hwl@...group.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Sean Christopherson <seanjc@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	kvm@...r.kernel.org,
	Paolo Bonzini <pbonzini@...hat.com>,
	x86@...nel.org,
	Kees Cook <keescook@...omium.org>,
	Juergen Gross <jgross@...e.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>
Subject: [RFC PATCH 22/73] KVM: x86/PVM: Handle some VM exits before enable interrupts

From: Lai Jiangshan <jiangshan.ljs@...group.com>

Similar to VMX, NMI should be handled in non-instrumented code early
after VM exit. Additionally, #PF, #VE, #VC, and #DB need early handling
in non-instrumented code as well. Host interrupts and #MC need to be
handled before enabling interrupts.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@...group.com>
---
 arch/x86/kvm/pvm/pvm.c | 89 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/pvm/pvm.h |  8 ++++
 2 files changed, 97 insertions(+)

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 00a50ed0c118..29c6d8da7c19 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -265,6 +265,58 @@ static void pvm_setup_mce(struct kvm_vcpu *vcpu)
 {
 }
 
+static int handle_exit_external_interrupt(struct kvm_vcpu *vcpu)
+{
+	++vcpu->stat.irq_exits;
+	return 1;
+}
+
+static int handle_exit_failed_vmentry(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+	u32 error_code = pvm->exit_error_code;
+
+	kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
+	return 1;
+}
+
+/*
+ * The guest has exited.  See if we can fix it or if we need userspace
+ * assistance.
+ */
+static int pvm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+	u32 exit_reason = pvm->exit_vector;
+
+	if (exit_reason >= FIRST_EXTERNAL_VECTOR && exit_reason < NR_VECTORS)
+		return handle_exit_external_interrupt(vcpu);
+	else if (exit_reason == PVM_FAILED_VMENTRY_VECTOR)
+		return handle_exit_failed_vmentry(vcpu);
+
+	vcpu_unimpl(vcpu, "pvm: unexpected exit reason 0x%x\n", exit_reason);
+	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+	vcpu->run->internal.suberror =
+		KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
+	vcpu->run->internal.ndata = 2;
+	vcpu->run->internal.data[0] = exit_reason;
+	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
+	return 0;
+}
+
+static void pvm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_pvm *pvm = to_pvm(vcpu);
+	u32 vector = pvm->exit_vector;
+	gate_desc *desc = (gate_desc *)host_idt_base + vector;
+
+	if (vector >= FIRST_EXTERNAL_VECTOR && vector < NR_VECTORS &&
+	    vector != IA32_SYSCALL_VECTOR)
+		kvm_do_interrupt_irqoff(vcpu, gate_offset(desc));
+	else if (vector == MC_VECTOR)
+		kvm_machine_check();
+}
+
 static bool pvm_has_emulated_msr(struct kvm *kvm, u32 index)
 {
 	switch (index) {
@@ -369,6 +421,40 @@ static noinstr void pvm_vcpu_run_noinstr(struct kvm_vcpu *vcpu)
 	pvm->exit_vector = (ret_regs->orig_ax >> 32);
 	pvm->exit_error_code = (u32)ret_regs->orig_ax;
 
+	// handle noinstr vmexits reasons.
+	switch (pvm->exit_vector) {
+	case PF_VECTOR:
+		// if the exit due to #PF, check for async #PF.
+		pvm->exit_cr2 = read_cr2();
+		vcpu->arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags();
+		break;
+	case NMI_VECTOR:
+		kvm_do_nmi_irqoff(vcpu);
+		break;
+	case VE_VECTOR:
+		// TODO: pvm host is TDX guest.
+		// tdx_get_ve_info(&pvm->host_ve);
+		break;
+	case X86_TRAP_VC:
+		/*
+		 * TODO: pvm host is SEV guest.
+		 * if (!vc_is_db(error_code)) {
+		 *      collect info and handle the first part for #VC
+		 *      break;
+		 * } else {
+		 *      get_debugreg(pvm->exit_dr6, 6);
+		 *      set_debugreg(DR6_RESERVED, 6);
+		 * }
+		 */
+		break;
+	case DB_VECTOR:
+		get_debugreg(pvm->exit_dr6, 6);
+		set_debugreg(DR6_RESERVED, 6);
+		break;
+	default:
+		break;
+	}
+
 	guest_state_exit_irqoff();
 }
 
@@ -682,9 +768,12 @@ static struct kvm_x86_ops pvm_x86_ops __initdata = {
 
 	.vcpu_pre_run = pvm_vcpu_pre_run,
 	.vcpu_run = pvm_vcpu_run,
+	.handle_exit = pvm_handle_exit,
 
 	.vcpu_after_set_cpuid = pvm_vcpu_after_set_cpuid,
 
+	.handle_exit_irqoff = pvm_handle_exit_irqoff,
+
 	.sched_in = pvm_sched_in,
 
 	.nested_ops = &pvm_nested_ops,
diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h
index 349f4eac98ec..123cfe1c3c6a 100644
--- a/arch/x86/kvm/pvm/pvm.h
+++ b/arch/x86/kvm/pvm/pvm.h
@@ -7,6 +7,8 @@
 
 #define SWITCH_FLAGS_INIT	(SWITCH_FLAGS_SMOD)
 
+#define PVM_FAILED_VMENTRY_VECTOR	SWITCH_EXIT_REASONS_FAILED_VMETNRY
+
 #define PT_L4_SHIFT		39
 #define PT_L4_SIZE		(1UL << PT_L4_SHIFT)
 #define DEFAULT_RANGE_L4_SIZE	(32 * PT_L4_SIZE)
@@ -35,6 +37,12 @@ struct vcpu_pvm {
 
 	u16 host_ds_sel, host_es_sel;
 
+	union {
+		unsigned long exit_extra;
+		unsigned long exit_cr2;
+		unsigned long exit_dr6;
+		struct ve_info exit_ve;
+	};
 	u32 exit_vector;
 	u32 exit_error_code;
 	u32 hw_cs, hw_ss;
-- 
2.19.1.6.gb485710b


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ