lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240226143630.33643-68-jiangshanlai@gmail.com>
Date: Mon, 26 Feb 2024 22:36:24 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Lai Jiangshan <jiangshan.ljs@...group.com>,
	Hou Wenlong <houwenlong.hwl@...group.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Sean Christopherson <seanjc@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	kvm@...r.kernel.org,
	Paolo Bonzini <pbonzini@...hat.com>,
	x86@...nel.org,
	Kees Cook <keescook@...omium.org>,
	Juergen Gross <jgross@...e.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>
Subject: [RFC PATCH 67/73] x86/pvm: Implement cpu related PVOPS

From: Lai Jiangshan <jiangshan.ljs@...group.com>

The MSR read/write operations are in the hot path, so use hypercalls in
their PVOPS to enhance performance. Additionally, it is important to
ensure that load_gs_index() and load_tls() notify the hypervisor in
their PVOPS.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@...group.com>
---
 arch/x86/Kconfig      |  1 +
 arch/x86/kernel/pvm.c | 85 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 32a2ab49752b..60e28727580a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -855,6 +855,7 @@ config PVM_GUEST
 	bool "PVM Guest support"
 	depends on X86_64 && KVM_GUEST && X86_PIE && !KASAN
 	select PAGE_TABLE_ISOLATION
+	select PARAVIRT_XXL
 	select RANDOMIZE_MEMORY
 	select RELOCATABLE_UNCOMPRESSED_KERNEL
 	default n
diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c
index d39550a8159f..12a35bef9bb8 100644
--- a/arch/x86/kernel/pvm.c
+++ b/arch/x86/kernel/pvm.c
@@ -73,6 +73,81 @@ static __always_inline long pvm_hypercall3(unsigned int nr, unsigned long p1,
 	return ret;
 }
 
+static void pvm_load_gs_index(unsigned int sel)
+{
+	if (sel & 4) {
+		pr_warn_once("pvm guest doesn't support LDT");
+		this_cpu_write(pvm_vcpu_struct.user_gsbase, 0);
+	} else {
+		unsigned long base;
+
+		preempt_disable();
+		base = pvm_hypercall1(PVM_HC_LOAD_GS, sel);
+		__this_cpu_write(pvm_vcpu_struct.user_gsbase, base);
+		preempt_enable();
+	}
+}
+
+static unsigned long long pvm_read_msr_safe(unsigned int msr, int *err)
+{
+	switch (msr) {
+	case MSR_FS_BASE:
+		*err = 0;
+		return rdfsbase();
+	case MSR_KERNEL_GS_BASE:
+		*err = 0;
+		return this_cpu_read(pvm_vcpu_struct.user_gsbase);
+	default:
+		return native_read_msr_safe(msr, err);
+	}
+}
+
+static unsigned long long pvm_read_msr(unsigned int msr)
+{
+	switch (msr) {
+	case MSR_FS_BASE:
+		return rdfsbase();
+	case MSR_KERNEL_GS_BASE:
+		return this_cpu_read(pvm_vcpu_struct.user_gsbase);
+	default:
+		return pvm_hypercall1(PVM_HC_RDMSR, msr);
+	}
+}
+
+static int notrace pvm_write_msr_safe(unsigned int msr, u32 low, u32 high)
+{
+	unsigned long base = ((u64)high << 32) | low;
+
+	switch (msr) {
+	case MSR_FS_BASE:
+		wrfsbase(base);
+		return 0;
+	case MSR_KERNEL_GS_BASE:
+		this_cpu_write(pvm_vcpu_struct.user_gsbase, base);
+		return 0;
+	default:
+		return pvm_hypercall2(PVM_HC_WRMSR, msr, base);
+	}
+}
+
+static void notrace pvm_write_msr(unsigned int msr, u32 low, u32 high)
+{
+	pvm_write_msr_safe(msr, low, high);
+}
+
+static void pvm_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+	struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+	unsigned long *tls_array = (unsigned long *)gdt;
+
+	if (memcmp(&gdt[GDT_ENTRY_TLS_MIN], &t->tls_array[0], sizeof(t->tls_array))) {
+		native_load_tls(t, cpu);
+		pvm_hypercall3(PVM_HC_LOAD_TLS, tls_array[GDT_ENTRY_TLS_MIN],
+			       tls_array[GDT_ENTRY_TLS_MIN + 1],
+			       tls_array[GDT_ENTRY_TLS_MIN + 2]);
+	}
+}
+
 void __init pvm_early_event(struct pt_regs *regs)
 {
 	int vector = regs->orig_ax >> 32;
@@ -302,6 +377,16 @@ void __init pvm_early_setup(void)
 	setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST);
 	setup_force_cpu_cap(X86_FEATURE_PV_GUEST);
 
+	/* PVM takes care of %gs when switching to usermode for us */
+	pv_ops.cpu.load_gs_index = pvm_load_gs_index;
+	pv_ops.cpu.cpuid = pvm_cpuid;
+
+	pv_ops.cpu.read_msr = pvm_read_msr;
+	pv_ops.cpu.write_msr = pvm_write_msr;
+	pv_ops.cpu.read_msr_safe = pvm_read_msr_safe;
+	pv_ops.cpu.write_msr_safe = pvm_write_msr_safe;
+	pv_ops.cpu.load_tls = pvm_load_tls;
+
 	wrmsrl(MSR_PVM_VCPU_STRUCT, __pa(this_cpu_ptr(&pvm_vcpu_struct)));
 	wrmsrl(MSR_PVM_EVENT_ENTRY, (unsigned long)(void *)pvm_early_kernel_event_entry - 256);
 	wrmsrl(MSR_PVM_SUPERVISOR_REDZONE, PVM_SUPERVISOR_REDZONE_SIZE);
-- 
2.19.1.6.gb485710b


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ