linux-kernel - [RFC PATCH 38/73] KVM: x86/PVM: Handle hypercalls for privilege instruction emulation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-Id: <20240226143630.33643-39-jiangshanlai@gmail.com>
Date: Mon, 26 Feb 2024 22:35:55 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Lai Jiangshan <jiangshan.ljs@...group.com>,
	Hou Wenlong <houwenlong.hwl@...group.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Sean Christopherson <seanjc@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	kvm@...r.kernel.org,
	Paolo Bonzini <pbonzini@...hat.com>,
	x86@...nel.org,
	Kees Cook <keescook@...omium.org>,
	Juergen Gross <jgross@...e.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>
Subject: [RFC PATCH 38/73] KVM: x86/PVM: Handle hypercalls for privilege instruction emulation

From: Lai Jiangshan <jiangshan.ljs@...group.com>

The privileged instructions in the PVM guest will be trapped and
emulated. To reduce the emulation overhead, some privileged instructions
in the hot path, such as RDMSR/WRMSR and TLB flushing related
instructions, will be replaced by hypercalls to improve performance.
The handling of those hypercalls is the same as the associated
privileged instruction emulation.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@...group.com>
---
 arch/x86/kvm/pvm/pvm.c | 114 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 113 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 2d3785e7f2f3..8d8c783c72b5 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -1434,6 +1434,96 @@ static int handle_synthetic_instruction_return_supervisor(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static int handle_hc_interrupt_window(struct kvm_vcpu *vcpu)
+{
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+	pvm_event_flags_update(vcpu, 0, PVM_EVENT_FLAGS_IP);
+
+	++vcpu->stat.irq_window_exits;
+	return 1;
+}
+
+static int handle_hc_irq_halt(struct kvm_vcpu *vcpu)
+{
+	kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) | X86_EFLAGS_IF);
+
+	return kvm_emulate_halt_noskip(vcpu);
+}
+
+static void pvm_flush_tlb_guest_current_kernel_user(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * sync the current pgd and user_pgd (pvm->msr_switch_cr3)
+	 * which is a subset work of KVM_REQ_TLB_FLUSH_GUEST.
+	 */
+	kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
+}
+
+/*
+ * Hypercall: PVM_HC_TLB_FLUSH
+ *	Flush all TLBs.
+ */
+static int handle_hc_flush_tlb_all(struct kvm_vcpu *vcpu)
+{
+	kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
+
+	return 1;
+}
+
+/*
+ * Hypercall: PVM_HC_TLB_FLUSH_CURRENT
+ *	Flush all TLBs tagged with the current CR3 and MSR_PVM_SWITCH_CR3.
+ */
+static int handle_hc_flush_tlb_current_kernel_user(struct kvm_vcpu *vcpu)
+{
+	pvm_flush_tlb_guest_current_kernel_user(vcpu);
+
+	return 1;
+}
+
+/*
+ * Hypercall: PVM_HC_TLB_INVLPG
+ *	Flush TLBs associated with a single address for all tags.
+ */
+static int handle_hc_invlpg(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	kvm_mmu_invlpg(vcpu, addr);
+
+	return 1;
+}
+
+/*
+ * Hypercall: PVM_HC_RDMSR
+ *	Write MSR.
+ *	Return with RAX = the MSR value if succeeded.
+ *	Return with RAX = 0 if it failed.
+ */
+static int handle_hc_rdmsr(struct kvm_vcpu *vcpu, u32 index)
+{
+	u64 value = 0;
+
+	kvm_get_msr(vcpu, index, &value);
+	kvm_rax_write(vcpu, value);
+
+	return 1;
+}
+
+/*
+ * Hypercall: PVM_HC_WRMSR
+ *	Write MSR.
+ *	Return with RAX = 0 if succeeded.
+ *	Return with RAX = -EIO if it failed
+ */
+static int handle_hc_wrmsr(struct kvm_vcpu *vcpu, u32 index, u64 value)
+{
+	if (kvm_set_msr(vcpu, index, value))
+		kvm_rax_write(vcpu, -EIO);
+	else
+		kvm_rax_write(vcpu, 0);
+
+	return 1;
+}
+
 static int handle_kvm_hypercall(struct kvm_vcpu *vcpu)
 {
 	int r;
@@ -1450,6 +1540,7 @@ static int handle_exit_syscall(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_pvm *pvm = to_pvm(vcpu);
 	unsigned long rip = kvm_rip_read(vcpu);
+	unsigned long a0, a1;
 
 	if (!is_smod(pvm))
 		return do_pvm_user_event(vcpu, PVM_SYSCALL_VECTOR, false, 0);
@@ -1459,7 +1550,28 @@ static int handle_exit_syscall(struct kvm_vcpu *vcpu)
 	if (rip == pvm->msr_rets_rip_plus2)
 		return handle_synthetic_instruction_return_supervisor(vcpu);
 
-	return handle_kvm_hypercall(vcpu);
+	a0 = kvm_rbx_read(vcpu);
+	a1 = kvm_r10_read(vcpu);
+
+	// handle hypercall, check it for pvm hypercall and then kvm hypercall
+	switch (kvm_rax_read(vcpu)) {
+	case PVM_HC_IRQ_WIN:
+		return handle_hc_interrupt_window(vcpu);
+	case PVM_HC_IRQ_HALT:
+		return handle_hc_irq_halt(vcpu);
+	case PVM_HC_TLB_FLUSH:
+		return handle_hc_flush_tlb_all(vcpu);
+	case PVM_HC_TLB_FLUSH_CURRENT:
+		return handle_hc_flush_tlb_current_kernel_user(vcpu);
+	case PVM_HC_TLB_INVLPG:
+		return handle_hc_invlpg(vcpu, a0);
+	case PVM_HC_RDMSR:
+		return handle_hc_rdmsr(vcpu, a0);
+	case PVM_HC_WRMSR:
+		return handle_hc_wrmsr(vcpu, a0, a1);
+	default:
+		return handle_kvm_hypercall(vcpu);
+	}
 }
 
 static int handle_exit_debug(struct kvm_vcpu *vcpu)
-- 
2.19.1.6.gb485710b