lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20180209142228.21231-4-kirill.shutemov@linux.intel.com>
Date:   Fri,  9 Feb 2018 17:22:27 +0300
From:   "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
To:     Ingo Molnar <mingo@...hat.com>, x86@...nel.org,
        Thomas Gleixner <tglx@...utronix.de>,
        "H. Peter Anvin" <hpa@...or.com>
Cc:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Andy Lutomirski <luto@...capital.net>,
        Cyrill Gorcunov <gorcunov@...nvz.org>,
        Borislav Petkov <bp@...e.de>, Andi Kleen <ak@...ux.intel.com>,
        Matthew Wilcox <willy@...radead.org>, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Subject: [PATCHv9 3/4] x86/boot/compressed/64: Prepare trampoline memory

If a bootloader enables 64-bit mode with 4-level paging, we might need to
switch over to 5-level paging. The switching requires the disabling
paging. It works fine if kernel itself is loaded below 4G.

But if the bootloader put the kernel above 4G (not sure if anybody does
this), we would lose control as soon as paging is disabled, because the
code becomes unreachable to the CPU.

To handle the situation, we need a trampoline in lower memory that would
take care of switching on 5-level paging.

Apart from the trampoline code itself we also need a place to store
top-level page table in lower memory as we don't have a way to load
64-bit values into CR3 in 32-bit mode. We only really need 8 bytes there
as we only use the very first entry of the page table. But we allocate a
whole page anyway.

We cannot have the code in the same page as the page table because there's
a risk that a CPU would read the page table speculatively and get confused
by seeing garbage. It's never a good idea to have junk in PTE entries
visible to the CPU.

We also need a small stack in the trampoline to re-enable long mode via
long return. But stack and code can share the page just fine.

The same trampoline can be used to switch from 5- to 4-level paging
mode, like when starting 4-level paging kernel via kexec() when original
kernel worked in 5-level paging mode.

This patch changes paging_prepare() to find a right spot in lower memory
for the trampoline. Then it copies the trampoline code there and sets up
the new top-level page table for 5-level paging.

We also add cleanup_trampoline() that restores the trampoline memory
back once we've done.

At this point we do all the preparation, but don't use trampoline yet.
It will be done in the following patch.

The trampoline will be used even on 4-level paging machines. This way we
will get better test coverage and the keep the trampoline code in shape.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@...ux.intel.com>
---
 arch/x86/boot/compressed/head_64.S    | 24 ++++++++++-
 arch/x86/boot/compressed/pgtable.h    | 18 ++++++++
 arch/x86/boot/compressed/pgtable_64.c | 79 +++++++++++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/boot/compressed/pgtable.h

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 10b4df46de84..74026c3da4c3 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -33,6 +33,7 @@
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 #include <asm/bootparam.h>
+#include "pgtable.h"
 
 /*
  * Locally defined symbols should be marked hidden:
@@ -355,6 +356,17 @@ ENTRY(startup_64)
 	lretq
 lvl5:
 
+	/*
+	 * cleanup_trampoline() would restore trampoline memory.
+	 *
+	 * RSI holds real mode data and need to be preserved across
+	 * a function call.
+	 */
+	pushq	%rsi
+	movq	%rcx, %rdi
+	call	cleanup_trampoline
+	popq	%rsi
+
 	/* Zero EFLAGS */
 	pushq	$0
 	popfq
@@ -491,8 +503,9 @@ relocated:
 	jmp	*%rax
 
 	.code32
+ENTRY(trampoline_32bit_src)
 compatible_mode:
-	/* Setup data and stack segments */
+	/* Set up data and stack segments */
 	movl	$__KERNEL_DS, %eax
 	movl	%eax, %ds
 	movl	%eax, %ss
@@ -577,6 +590,11 @@ boot_stack:
 	.fill BOOT_STACK_SIZE, 1, 0
 boot_stack_end:
 
+/* Space to preserve trampoline memory */
+	.global trampoline_save
+trampoline_save:
+	.fill TRAMPOLINE_32BIT_SIZE, 1, 0
+
 /*
  * Space for page tables (not in .bss so not zeroed)
  */
@@ -586,3 +604,7 @@ pgtable:
 	.fill BOOT_PGT_SIZE, 1, 0
 lvl5_pgtable:
 	.fill PAGE_SIZE, 1, 0
+
+	.global pgtable_trampoline
+pgtable_trampoline:
+	.fill 4096, 1, 0
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
new file mode 100644
index 000000000000..6e0db2260147
--- /dev/null
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -0,0 +1,18 @@
+#ifndef BOOT_COMPRESSED_PAGETABLE_H
+#define BOOT_COMPRESSED_PAGETABLE_H
+
+#define TRAMPOLINE_32BIT_SIZE		(2 * PAGE_SIZE)
+
+#define TRAMPOLINE_32BIT_PGTABLE_OFFSET	0
+
+#define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE	0x60
+
+#define TRAMPOLINE_32BIT_STACK_END	TRAMPOLINE_32BIT_SIZE
+
+#ifndef __ASSEMBLER__
+
+extern void (*trampoline_32bit_src)(void *return_ptr);
+
+#endif /* __ASSEMBLER__ */
+#endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 3f1697fcc7a8..dad5da7b4c1a 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -1,4 +1,6 @@
 #include <asm/processor.h>
+#include "pgtable.h"
+#include "../string.h"
 
 /*
  * __force_order is used by special_insns.h asm code to force instruction
@@ -9,19 +11,96 @@
  */
 unsigned long __force_order;
 
+#define BIOS_START_MIN		0x20000U	/* 128K, less than this is insane */
+#define BIOS_START_MAX		0x9f000U	/* 640K, absolute maximum */
+
 struct paging_config {
 	unsigned long trampoline_start;
 	unsigned long l5_required;
 };
 
+extern void *trampoline_save;
+extern void *pgtable_trampoline;
+
 struct paging_config paging_prepare(void)
 {
 	struct paging_config paging_config = {};
+	unsigned long bios_start, ebda_start, *trampoline;
 
 	/* Check if LA57 is desired and supported */
 	if (IS_ENABLED(CONFIG_X86_5LEVEL) && native_cpuid_eax(0) >= 7 &&
 			(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
 		paging_config.l5_required = 1;
 
+	/*
+	 * Find a suitable spot for the trampoline.
+	 * This code is based on reserve_bios_regions().
+	 */
+
+	ebda_start = *(unsigned short *)0x40e << 4;
+	bios_start = *(unsigned short *)0x413 << 10;
+
+	if (bios_start < BIOS_START_MIN || bios_start > BIOS_START_MAX)
+		bios_start = BIOS_START_MAX;
+
+	if (ebda_start > BIOS_START_MIN && ebda_start < bios_start)
+		bios_start = ebda_start;
+
+	/* Place the trampoline just below the end of low memory, aligned to 4k */
+	paging_config.trampoline_start = bios_start - TRAMPOLINE_32BIT_SIZE;
+	paging_config.trampoline_start = round_down(paging_config.trampoline_start, PAGE_SIZE);
+
+	trampoline = (unsigned long *)paging_config.trampoline_start;
+
+	/* Preserve trampoline memory */
+	memcpy(trampoline_save, trampoline, TRAMPOLINE_32BIT_SIZE);
+
+	/* Clear trampoline memory first */
+	memset(trampoline, 0, TRAMPOLINE_32BIT_SIZE);
+
+	/* Copy trampoline code in place */
+	memcpy(trampoline + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+			&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
+
+	/*
+	 * Set up a new page table that will be used for switching from 4-
+	 * to 5-level paging or vice versa. In other cases trampoline
+	 * wouldn't touch CR3.
+	 *
+	 * For 4- to 5-level paging transition, set up current CR3 as the
+	 * first and the only entry in a new top-level page table.
+	 *
+	 * For 5- to 4-level paging transition, copy page table pointed by
+	 * first entry in the current top-level page table as our new
+	 * top-level page table. We just cannot point to the page table
+	 * from trampoline as it may be above 4G.
+	 */
+	if (paging_config.l5_required) {
+		trampoline[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() + _PAGE_TABLE_NOENC;
+	} else if (native_read_cr4() & X86_CR4_LA57) {
+		unsigned long src;
+
+		src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
+		memcpy(trampoline + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
+		       (void *)src, PAGE_SIZE);
+	}
+
 	return paging_config;
 }
+
+void cleanup_trampoline(void *trampoline)
+{
+	void *cr3 = (void *)__native_read_cr3();
+
+	/*
+	 * Move the top level page table out of trampoline memory,
+	 * if it's there.
+	 */
+	if (cr3 == trampoline + TRAMPOLINE_32BIT_PGTABLE_OFFSET) {
+		memcpy(pgtable_trampoline, trampoline + TRAMPOLINE_32BIT_PGTABLE_OFFSET, PAGE_SIZE);
+		native_write_cr3((unsigned long)pgtable_trampoline);
+	}
+
+	/* Restore trampoline memory */
+	memcpy(trampoline, trampoline_save, TRAMPOLINE_32BIT_SIZE);
+}
-- 
2.15.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ