From: Yinghai Lu Subject: [PATCH v4] x86, kaslr, 64bit: Set new or extra ident_mapping First, aslr will support to put random VO above 4G, so we must set ident mapping for the range even we come from startup_32 path. Second, when boot from 64bit bootloader, bootloader set ident mapping, and boot via ZO (arch/x86/boot/compressed/vmlinux) startup_64. Those pages for pagetable need to be avoided when we select new random VO (vmlinux) base. Otherwise decompressor would overwrite them during decompressing. One solution: go through pagetable and find out every page is used by pagetable for every mem_aovid checking but we will need extra code. Other solution: create new ident mapping instead, and pages for pagetable will sit in _pagetable section of ZO, and they are in mem_avoid array already. In this way, we can reuse the code for setting ident mapping. The _pgtable will be shared 32bit and 64bit path to reduce init_size, as now ZO _rodata to _end will contribute init_size. Need to increase pgt buffer size. When boot via startup_64, as we need to cover old VO, params, cmdline and new VO, in extreme case we could have them all cross 512G boundary, will need (2+2)*4 pages with 2M mapping. And need 2 for first 2M for vga ram. Plus one for level4. Total will be 19 pages. When boot via startup_32, aslr would move new VO above 4G, we need set extra ident mapping for new VO, pgt buffer come from _pgtable offset 6 pages. should only need (2+2) pages at most when it cross 512G boundary. So 19 pages could make both paths happy. Cc: Kees Cook Cc: Jiri Kosina Cc: Borislav Petkov Cc: Matt Fleming Signed-off-by: Yinghai Lu --- -v3: add mapping for first 2M with video ram when X86_VERBOSE_BOOTUP is set. Don't need to set mapping for setup_data, as it is already late in boot/ZO stage, will not access it until VO stage, and VO stage will use early_memmap or kernel address to access them. -v4: link misc_pgt.o instead of including misc_pgt.c in aslr.c up to request from Kees. --- arch/x86/boot/compressed/Makefile | 3 + arch/x86/boot/compressed/aslr.c | 14 +++++ arch/x86/boot/compressed/head_64.S | 4 - arch/x86/boot/compressed/misc.h | 11 ++++ arch/x86/boot/compressed/misc_pgt.c | 91 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/boot.h | 19 +++++++ 6 files changed, 140 insertions(+), 2 deletions(-) Index: linux-2.6/arch/x86/boot/compressed/misc_pgt.c =================================================================== --- /dev/null +++ linux-2.6/arch/x86/boot/compressed/misc_pgt.c @@ -0,0 +1,91 @@ +#define __pa(x) ((unsigned long)(x)) +#define __va(x) ((void *)((unsigned long)(x))) + +#include "misc.h" + +#include +#include + +#include "../../mm/ident_map.c" + +struct alloc_pgt_data { + unsigned char *pgt_buf; + unsigned long pgt_buf_size; + unsigned long pgt_buf_offset; +}; + +static void *alloc_pgt_page(void *context) +{ + struct alloc_pgt_data *d = (struct alloc_pgt_data *)context; + unsigned char *p = (unsigned char *)d->pgt_buf; + + if (d->pgt_buf_offset >= d->pgt_buf_size) { + debug_putstr("out of pgt_buf in misc.c\n"); + return NULL; + } + + p += d->pgt_buf_offset; + d->pgt_buf_offset += PAGE_SIZE; + + return p; +} + +/* + * Use a normal definition of memset() from string.c. There are already + * included header files which expect a definition of memset() and by + * the time we define memset macro, it is too late. + */ +#undef memset +#define memzero(s, n) memset((s), 0, (n)) + +unsigned long __force_order; +static struct alloc_pgt_data pgt_data; +static struct x86_mapping_info mapping_info; +static pgd_t *level4p; + +void fill_pagetable(unsigned long start, unsigned long size) +{ + unsigned long end = start + size; + + if (!level4p) { + pgt_data.pgt_buf_offset = 0; + mapping_info.alloc_pgt_page = alloc_pgt_page; + mapping_info.context = &pgt_data; + mapping_info.pmd_flag = __PAGE_KERNEL_LARGE_EXEC; + + /* + * come from startup_32 ? + * then cr3 is _pgtable, we can reuse it. + */ + level4p = (pgd_t *)read_cr3(); + if ((unsigned long)level4p == (unsigned long)_pgtable) { + pgt_data.pgt_buf = (unsigned char *)_pgtable + + BOOT_INIT_PGT_SIZE; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE - + BOOT_INIT_PGT_SIZE; + + debug_putstr("boot via startup_32\n"); + } else { + pgt_data.pgt_buf = (unsigned char *)_pgtable; + pgt_data.pgt_buf_size = BOOT_PGT_SIZE; + + debug_putstr("boot via startup_64\n"); + level4p = (pgd_t *)alloc_pgt_page(&pgt_data); + } + memset((unsigned char *)pgt_data.pgt_buf, 0, + pgt_data.pgt_buf_size); + } + + /* align boundary to 2M */ + start = round_down(start, PMD_SIZE); + end = round_up(end, PMD_SIZE); + if (start >= end) + return; + + kernel_ident_mapping_init(&mapping_info, level4p, start, end); +} + +void switch_pagetable(void) +{ + write_cr3((unsigned long)level4p); +} Index: linux-2.6/arch/x86/boot/compressed/aslr.c =================================================================== --- linux-2.6.orig/arch/x86/boot/compressed/aslr.c +++ linux-2.6/arch/x86/boot/compressed/aslr.c @@ -160,6 +160,7 @@ static void mem_avoid_init(unsigned long unsafe = (unsigned long)input + input_size; mem_avoid[0].start = unsafe; mem_avoid[0].size = unsafe_len; + fill_pagetable(output, init_size); /* Avoid initrd. */ initrd_start = (u64)real_mode->ext_ramdisk_image << 32; @@ -168,6 +169,7 @@ static void mem_avoid_init(unsigned long initrd_size |= real_mode->hdr.ramdisk_size; mem_avoid[1].start = initrd_start; mem_avoid[1].size = initrd_size; + /* don't need to set mapping for initrd */ /* Avoid kernel command line. */ cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; @@ -178,10 +180,19 @@ static void mem_avoid_init(unsigned long ; mem_avoid[2].start = cmd_line; mem_avoid[2].size = cmd_line_size; + fill_pagetable(cmd_line, cmd_line_size); /* Avoid params */ mem_avoid[3].start = (unsigned long)real_mode; mem_avoid[3].size = sizeof(*real_mode); + fill_pagetable((unsigned long)real_mode, sizeof(*real_mode)); + + /* don't need to set mapping for setup_data */ + +#ifdef CONFIG_X86_VERBOSE_BOOTUP + /* for video ram */ + fill_pagetable(0, PMD_SIZE); +#endif } /* Does this memory vector overlap a known avoided area? */ @@ -362,6 +373,9 @@ unsigned char *choose_kernel_location(st goto out; choice = random; + + fill_pagetable(choice, init_size); + switch_pagetable(); out: return (unsigned char *)choice; } Index: linux-2.6/arch/x86/boot/compressed/head_64.S =================================================================== --- linux-2.6.orig/arch/x86/boot/compressed/head_64.S +++ linux-2.6/arch/x86/boot/compressed/head_64.S @@ -125,7 +125,7 @@ ENTRY(startup_32) /* Initialize Page tables to 0 */ leal pgtable(%ebx), %edi xorl %eax, %eax - movl $((4096*6)/4), %ecx + movl $(BOOT_INIT_PGT_SIZE/4), %ecx rep stosl /* Build Level 4 */ @@ -477,4 +477,4 @@ boot_stack_end: .section ".pgtable","a",@nobits .balign 4096 pgtable: - .fill 6*4096, 1, 0 + .fill BOOT_PGT_SIZE, 1, 0 Index: linux-2.6/arch/x86/include/asm/boot.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/boot.h +++ linux-2.6/arch/x86/include/asm/boot.h @@ -32,7 +32,26 @@ #endif /* !CONFIG_KERNEL_BZIP2 */ #ifdef CONFIG_X86_64 + #define BOOT_STACK_SIZE 0x4000 + +#define BOOT_INIT_PGT_SIZE (6*4096) +#ifdef CONFIG_RANDOMIZE_BASE +/* + * 1 page for level4, 2 pages for first 2M. + * (2+2)*4 pages for kernel, param, cmd_line, random kernel + * if all cross 512G boundary. + * So total will be 19 pages. + */ +#ifdef CONFIG_X86_VERBOSE_BOOTUP +#define BOOT_PGT_SIZE (19*4096) +#else +#define BOOT_PGT_SIZE (17*4096) +#endif +#else +#define BOOT_PGT_SIZE BOOT_INIT_PGT_SIZE +#endif + #else #define BOOT_STACK_SIZE 0x1000 #endif Index: linux-2.6/arch/x86/boot/compressed/Makefile =================================================================== --- linux-2.6.orig/arch/x86/boot/compressed/Makefile +++ linux-2.6/arch/x86/boot/compressed/Makefile @@ -46,6 +46,9 @@ vmlinux-objs-y := $(obj)/vmlinux.lds $(o vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/aslr.o +ifdef CONFIG_X86_64 + vmlinux-objs-$(CONFIG_RANDOMIZE_BASE) += $(obj)/misc_pgt.o +endif $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone Index: linux-2.6/arch/x86/boot/compressed/misc.h =================================================================== --- linux-2.6.orig/arch/x86/boot/compressed/misc.h +++ linux-2.6/arch/x86/boot/compressed/misc.h @@ -76,6 +76,17 @@ unsigned char *choose_kernel_location(st } #endif +#ifdef CONFIG_X86_64 +void fill_pagetable(unsigned long start, unsigned long size); +void switch_pagetable(void); +extern unsigned char _pgtable[]; +#else +static inline void fill_pagetable(unsigned long start, unsigned long size) +{ } +static inline void switch_pagetable(void) +{ } +#endif + #ifdef CONFIG_EARLY_PRINTK /* early_serial_console.c */ extern int early_serial_base;