lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1481294518-29595-3-git-send-email-bhe@redhat.com>
Date:   Fri,  9 Dec 2016 22:41:58 +0800
From:   Baoquan He <bhe@...hat.com>
To:     linux-kernel@...r.kernel.org
Cc:     tglx@...utronix.de, hpa@...or.com, mingo@...hat.com,
        x86@...nel.org, keescook@...omium.org, yinghai@...nel.org,
        bp@...e.de, thgarnie@...gle.com, kuleshovmail@...il.com,
        luto@...nel.org, mcgrof@...nel.org, anderson@...hat.com,
        dyoung@...hat.com, xlpang@...hat.com, Baoquan He <bhe@...hat.com>
Subject: [PATCH v2 2/2] x86/KASLR/64: Determine kernel text mapping size at runtime

X86 64 kernel takes KERNEL_IMAGE_SIZE as the kernel text mapping size,
and it's fixed as compiling time, changing from 512M to 1G as long as
CONFIG_RANDOMIZE_BASE is enabled, though people specify kernel option
"nokaslr" explicitly.

This could be a wrong behaviour. CONFIG_RANDOMIZE_BASE should only decide
if the KASLR code need be compiled in. If user specify "nokaslr", the
kernel should behave as no KASLR code compiled in at all.

So in this patch, define a new MACRO KERNEL_MAPPING_SIZE to represent the
size of kernel text mapping area, and let KERNEL_IMAGE_SIZE limit the size
of kernel runtime space. And change to determine the size of kernel text
mapping area at runtime. Though KASLR code compiled in, if "nokaslr"
specified, still set kernel mapping size to be 512M.

Signed-off-by: Baoquan He <bhe@...hat.com>
Acked-by: Kees Cook <keescook@...omium.org>
---
v1->v2:
  - Move declaration of kernel_mapping_size out of #ifdef CONFIG_RANDOMIZE_MEMORY
    scope of <asm/kaslr.h> to remove compiling error if CONFIG_RANDOMIZE_MEMORY is
    disabled.

  - Change to define _kernel_mapping_size as a static variable inside
    boot/compressed/kaslr.c file.

 arch/x86/boot/compressed/kaslr.c        | 20 +++++++++++++++-----
 arch/x86/include/asm/kaslr.h            |  1 +
 arch/x86/include/asm/page_64_types.h    | 19 +++++++++++--------
 arch/x86/include/asm/pgtable_64_types.h |  2 +-
 arch/x86/kernel/head64.c                | 11 ++++++-----
 arch/x86/kernel/head_64.S               |  3 ++-
 arch/x86/mm/dump_pagetables.c           |  3 ++-
 arch/x86/mm/physaddr.c                  |  6 +++---
 8 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a66854d..823f294 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -22,6 +22,12 @@
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
 		LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
+/*
+ * By default, the size of kernel text mapping equals KERNEL_IMAGE_SIZE.
+ * While x86_64 may extend it to 1G if KASLR is enabled.
+ */
+static unsigned long _kernel_mapping_size = KERNEL_IMAGE_SIZE;
+
 static unsigned long rotate_xor(unsigned long hash, const void *area,
 				size_t size)
 {
@@ -311,7 +317,7 @@ static void process_e820_entry(struct e820entry *entry,
 		return;
 
 	/* On 32-bit, ignore entries entirely above our maximum. */
-	if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= KERNEL_IMAGE_SIZE)
+	if (IS_ENABLED(CONFIG_X86_32) && entry->addr >= _kernel_mapping_size)
 		return;
 
 	/* Ignore entries entirely below our minimum. */
@@ -341,8 +347,8 @@ static void process_e820_entry(struct e820entry *entry,
 
 		/* On 32-bit, reduce region size to fit within max size. */
 		if (IS_ENABLED(CONFIG_X86_32) &&
-		    region.start + region.size > KERNEL_IMAGE_SIZE)
-			region.size = KERNEL_IMAGE_SIZE - region.start;
+		    region.start + region.size > _kernel_mapping_size)
+			region.size = _kernel_mapping_size - region.start;
 
 		/* Return if region can't contain decompressed kernel */
 		if (region.size < image_size)
@@ -408,9 +414,9 @@ static unsigned long find_random_virt_addr(unsigned long minimum,
 	/*
 	 * There are how many CONFIG_PHYSICAL_ALIGN-sized slots
 	 * that can hold image_size within the range of minimum to
-	 * KERNEL_IMAGE_SIZE?
+	 * _kernel_mapping_size?
 	 */
-	slots = (KERNEL_IMAGE_SIZE - minimum - image_size) /
+	slots = (_kernel_mapping_size - minimum - image_size) /
 		 CONFIG_PHYSICAL_ALIGN + 1;
 
 	random_addr = kaslr_get_random_long("Virtual") % slots;
@@ -438,6 +444,10 @@ void choose_random_location(unsigned long input,
 		return;
 	}
 
+#ifdef CONFIG_X86_64
+	_kernel_mapping_size = KERNEL_MAPPING_SIZE_EXT;
+#endif
+
 	boot_params->hdr.loadflags |= KASLR_FLAG;
 
 	/* Prepare to add new identity pagetables on demand. */
diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index 1052a79..093935d 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -2,6 +2,7 @@
 #define _ASM_KASLR_H_
 
 unsigned long kaslr_get_random_long(const char *purpose);
+extern unsigned long kernel_mapping_size;
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
 extern unsigned long page_offset_base;
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 62a20ea..b8e79d7 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -49,18 +49,21 @@
 #define __PHYSICAL_MASK_SHIFT	46
 #define __VIRTUAL_MASK_SHIFT	47
 
+
+/*
+ * Kernel image size is limited to 512 MB. The kernel code+data+bss
+ * must not be bigger than that.
+ */
+#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+
 /*
- * Kernel image size is limited to 1GiB due to the fixmap living in the
- * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use
- * 512MiB by default, leaving 1.5GiB for modules once the page tables
+ * Kernel mapping size is limited to 1GiB due to the fixmap living in
+ * the next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
+ * Use 512MiB by default, leaving 1.5GiB for modules once the page tables
  * are fully set up. If kernel ASLR is configured, it can extend the
  * kernel page table mapping, reducing the size of the modules area.
  */
 #define KERNEL_MAPPING_SIZE_EXT	(1024 * 1024 * 1024)
-#if defined(CONFIG_RANDOMIZE_BASE)
-#define KERNEL_IMAGE_SIZE	KERNEL_MAPPING_SIZE_EXT
-#else
-#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
-#endif
+#define KERNEL_MAPPING_SIZE	kernel_mapping_size
 
 #endif /* _ASM_X86_PAGE_64_DEFS_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a26420..a357050 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -66,7 +66,7 @@ typedef struct { pteval_t pte; } pte_t;
 #define VMEMMAP_START	__VMEMMAP_BASE
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 #define VMALLOC_END	(VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
-#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#define MODULES_VADDR    (__START_KERNEL_map + KERNEL_MAPPING_SIZE)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
 #define ESPFIX_PGD_ENTRY _AC(-2, UL)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 54a2372..46d2bd2 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -28,6 +28,7 @@
 #include <asm/bootparam_utils.h>
 #include <asm/microcode.h>
 #include <asm/kasan.h>
+#include <asm/cmdline.h>
 
 /*
  * Manage page tables very early on.
@@ -36,6 +37,7 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD];
 extern pmd_t early_dynamic_pgts[EARLY_DYNAMIC_PAGE_TABLES][PTRS_PER_PMD];
 static unsigned int __initdata next_early_pgt = 2;
 pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
+unsigned long kernel_mapping_size = KERNEL_IMAGE_SIZE;
 
 /* Wipe all early page tables except for the kernel symbol map */
 static void __init reset_early_page_tables(void)
@@ -138,12 +140,7 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 	 * Build-time sanity checks on the kernel image and module
 	 * area mappings. (these are purely build-time and produce no code)
 	 */
-	BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
-	BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
-	BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
 	BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
-	BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
-	BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
 	BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
 				(__START_KERNEL & PGDIR_MASK)));
 	BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
@@ -165,6 +162,10 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
 
 	copy_bootdata(__va(real_mode_data));
 
+	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) &&
+		!cmdline_find_option_bool(boot_command_line, "nokaslr"))
+		kernel_mapping_size = KERNEL_MAPPING_SIZE_EXT;
+
 	/*
 	 * Load microcode early on BSP.
 	 */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c4b40e7c9..8bbb29e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -461,7 +461,8 @@ NEXT_PAGE(level2_kernel_pgt)
 	 * 512M if no kaslr, 1G if kaslr enabled. Later cleanup_highmap will
 	 * clean up those unused entries.
 	 *
-	 * The module area starts after kernel mapping area.
+	 * The module area starts after kernel mapping area, see MODULES_VADDR.
+	 * It will vary with KERNEL_MAPPING_SIZE.
 	 */
 	PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
 		PTRS_PER_PMD)
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index ea9c49a..412c3f5 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -82,7 +82,7 @@ static struct addr_marker address_markers[] = {
 	{ EFI_VA_END,		"EFI Runtime Services" },
 # endif
 	{ __START_KERNEL_map,   "High Kernel Mapping" },
-	{ MODULES_VADDR,        "Modules" },
+	{ 0/*MODULES_VADDR*/,        "Modules" },
 	{ MODULES_END,          "End Modules" },
 #else
 	{ PAGE_OFFSET,          "Kernel Mapping" },
@@ -442,6 +442,7 @@ static int __init pt_dump_init(void)
 	address_markers[LOW_KERNEL_NR].start_address = PAGE_OFFSET;
 	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
 	address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+	address_markers[MODULES_VADDR_NR].start_address = MODULES_VADDR;
 #endif
 #ifdef CONFIG_X86_32
 	address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
diff --git a/arch/x86/mm/physaddr.c b/arch/x86/mm/physaddr.c
index cfc3b91..c0b70fc 100644
--- a/arch/x86/mm/physaddr.c
+++ b/arch/x86/mm/physaddr.c
@@ -18,7 +18,7 @@ unsigned long __phys_addr(unsigned long x)
 	if (unlikely(x > y)) {
 		x = y + phys_base;
 
-		VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+		VIRTUAL_BUG_ON(y >= KERNEL_MAPPING_SIZE);
 	} else {
 		x = y + (__START_KERNEL_map - PAGE_OFFSET);
 
@@ -35,7 +35,7 @@ unsigned long __phys_addr_symbol(unsigned long x)
 	unsigned long y = x - __START_KERNEL_map;
 
 	/* only check upper bounds since lower bounds will trigger carry */
-	VIRTUAL_BUG_ON(y >= KERNEL_IMAGE_SIZE);
+	VIRTUAL_BUG_ON(y >= KERNEL_MAPPING_SIZE);
 
 	return y + phys_base;
 }
@@ -50,7 +50,7 @@ bool __virt_addr_valid(unsigned long x)
 	if (unlikely(x > y)) {
 		x = y + phys_base;
 
-		if (y >= KERNEL_IMAGE_SIZE)
+		if (y >= KERNEL_MAPPING_SIZE)
 			return false;
 	} else {
 		x = y + (__START_KERNEL_map - PAGE_OFFSET);
-- 
2.5.5

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ