lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240226143630.33643-59-jiangshanlai@gmail.com>
Date: Mon, 26 Feb 2024 22:36:15 +0800
From: Lai Jiangshan <jiangshanlai@...il.com>
To: linux-kernel@...r.kernel.org
Cc: Hou Wenlong <houwenlong.hwl@...group.com>,
	Lai Jiangshan <jiangshan.ljs@...group.com>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Sean Christopherson <seanjc@...gle.com>,
	Thomas Gleixner <tglx@...utronix.de>,
	Borislav Petkov <bp@...en8.de>,
	Ingo Molnar <mingo@...hat.com>,
	kvm@...r.kernel.org,
	Paolo Bonzini <pbonzini@...hat.com>,
	x86@...nel.org,
	Kees Cook <keescook@...omium.org>,
	Juergen Gross <jgross@...e.com>,
	Dave Hansen <dave.hansen@...ux.intel.com>,
	"H. Peter Anvin" <hpa@...or.com>,
	Andy Lutomirski <luto@...nel.org>
Subject: [RFC PATCH 58/73] x86/pvm: Relocate kernel address space layout

From: Hou Wenlong <houwenlong.hwl@...group.com>

Relocate the kernel address space layout to a specific range, which is
similar to KASLR. Since there is not enough room for KASAN, KASAN is not
supported for PVM guest.

Suggested-by: Lai Jiangshan <jiangshan.ljs@...group.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@...group.com>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@...group.com>
---
 arch/x86/Kconfig                  |  3 +-
 arch/x86/include/asm/pvm_para.h   |  6 +++
 arch/x86/kernel/head64_identity.c |  6 +++
 arch/x86/kernel/pvm.c             | 64 +++++++++++++++++++++++++++++++
 arch/x86/mm/kaslr.c               |  4 ++
 5 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1b4bea3db53d..ded687cc23ad 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -853,7 +853,8 @@ config KVM_GUEST
 
 config PVM_GUEST
 	bool "PVM Guest support"
-	depends on X86_64 && KVM_GUEST && X86_PIE
+	depends on X86_64 && KVM_GUEST && X86_PIE && !KASAN
+	select RANDOMIZE_MEMORY
 	select RELOCATABLE_UNCOMPRESSED_KERNEL
 	default n
 	help
diff --git a/arch/x86/include/asm/pvm_para.h b/arch/x86/include/asm/pvm_para.h
index efd7afdf9be9..ff0bf0fe7dc4 100644
--- a/arch/x86/include/asm/pvm_para.h
+++ b/arch/x86/include/asm/pvm_para.h
@@ -10,6 +10,7 @@
 #include <uapi/asm/kvm_para.h>
 
 void __init pvm_early_setup(void);
+bool __init pvm_kernel_layout_relocate(void);
 
 static inline void pvm_cpuid(unsigned int *eax, unsigned int *ebx,
 			     unsigned int *ecx, unsigned int *edx)
@@ -64,6 +65,11 @@ static inline bool pvm_detect(void)
 static inline void pvm_early_setup(void)
 {
 }
+
+static inline bool pvm_kernel_layout_relocate(void)
+{
+	return false;
+}
 #endif /* CONFIG_PVM_GUEST */
 
 #endif /* _ASM_X86_PVM_PARA_H */
diff --git a/arch/x86/kernel/head64_identity.c b/arch/x86/kernel/head64_identity.c
index f69f9904003c..467fe493c9ba 100644
--- a/arch/x86/kernel/head64_identity.c
+++ b/arch/x86/kernel/head64_identity.c
@@ -396,6 +396,12 @@ static void __head detect_pvm_range(void)
 	pml4_index_end = (msr_val >> 16) & 0x1ff;
 	pvm_range_start = (0x1fffe00 | pml4_index_start) * P4D_SIZE;
 	pvm_range_end = (0x1fffe00 | pml4_index_end) * P4D_SIZE;
+
+	/*
+	 * early page fault would map page into directing mapping area,
+	 * so we should modify 'page_offset_base' here early.
+	 */
+	page_offset_base = pvm_range_start;
 }
 
 void __head pvm_relocate_kernel(unsigned long physbase)
diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c
index fc82c71b305b..9cdfbaa15dbb 100644
--- a/arch/x86/kernel/pvm.c
+++ b/arch/x86/kernel/pvm.c
@@ -10,7 +10,10 @@
  */
 #define pr_fmt(fmt) "pvm-guest: " fmt
 
+#include <linux/mm_types.h>
+
 #include <asm/cpufeature.h>
+#include <asm/cpu_entry_area.h>
 #include <asm/pvm_para.h>
 
 unsigned long pvm_range_start __initdata;
@@ -23,3 +26,64 @@ void __init pvm_early_setup(void)
 
 	setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST);
 }
+
+#define TB_SHIFT	40
+#define HOLE_SIZE	(1UL << 39)
+
+#define PVM_DIRECT_MAPPING_SIZE		(8UL << TB_SHIFT)
+#define PVM_VMALLOC_SIZE		(5UL << TB_SHIFT)
+#define PVM_VMEM_MAPPING_SIZE		(1UL << TB_SHIFT)
+
+/*
+ * For a PVM guest, the hypervisor would provide one valid virtual address
+ * range for the guest kernel. The guest kernel needs to adjust its layout,
+ * including the direct mapping area, vmalloc area, vmemmap area, and CPU entry
+ * area, to be within this range. If the range start is 0xffffd90000000000, the
+ * PVM guest kernel with 4-level page tables could arrange its layout as
+ * follows:
+ *
+ * ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor
+ * ... host kernel used ...  guest kernel range start
+ * ffffd90000000000 - ffffe0ffffffffff (=8 TB) directing mapping of all physical memory
+ * ffffe10000000000 - ffffe17fffffffff (=39 bit) hole
+ * ffffe18000000000 - ffffe67fffffffff (=5 TB) vmalloc/ioremap space
+ * ffffe68000000000 - ffffe6ffffffffff (=39 bit) hole
+ * ffffe70000000000 - ffffe7ffffffffff (=40 bit) virtual memory map (1TB)
+ * ffffe80000000000 - ffffe87fffffffff (=39 bit) cpu_entry_area mapping
+ * ffffe88000000000 - ffffe8ff7fffffff (=510 G) hole
+ * ffffe8ff80000000 - ffffe8ffffffffff (=2 G) kernel image
+ * ... host kernel used ... guest kernel range end
+ *
+ */
+bool __init pvm_kernel_layout_relocate(void)
+{
+	unsigned long area_size;
+
+	if (!boot_cpu_has(X86_FEATURE_KVM_PVM_GUEST)) {
+		vmemory_end = VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1;
+		return false;
+	}
+
+	if (!IS_ALIGNED(pvm_range_start, PGDIR_SIZE))
+		panic("The start of the allowed range is not aligned");
+
+	area_size = max_pfn << PAGE_SHIFT;
+	if (area_size > PVM_DIRECT_MAPPING_SIZE)
+		panic("The memory size is too large for directing mapping area");
+
+	vmalloc_base = page_offset_base + PVM_DIRECT_MAPPING_SIZE + HOLE_SIZE;
+	vmemory_end = vmalloc_base + PVM_VMALLOC_SIZE;
+
+	vmemmap_base = vmemory_end + HOLE_SIZE;
+	area_size = max_pfn * sizeof(struct page);
+	if (area_size > PVM_VMEM_MAPPING_SIZE)
+		panic("The memory size is too large for virtual memory mapping area");
+
+	cpu_entry_area_base = vmemmap_base + PVM_VMEM_MAPPING_SIZE;
+	BUILD_BUG_ON(CPU_ENTRY_AREA_MAP_SIZE > (1UL << 39));
+
+	if (cpu_entry_area_base + (2UL << 39) > pvm_range_end)
+		panic("The size of the allowed range is too small");
+
+	return true;
+}
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index e3825c7542a3..f6f332abf515 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -28,6 +28,7 @@
 
 #include <asm/setup.h>
 #include <asm/kaslr.h>
+#include <asm/pvm_para.h>
 
 #include "mm_internal.h"
 
@@ -82,6 +83,9 @@ void __init kernel_randomize_memory(void)
 	BUILD_BUG_ON(vaddr_end != RAW_CPU_ENTRY_AREA_BASE);
 	BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
 
+	if (pvm_kernel_layout_relocate())
+		return;
+
 	if (!kaslr_memory_enabled())
 		return;
 
-- 
2.19.1.6.gb485710b


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ