lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20121221222840.GB1102@phenom.dumpdata.com>
Date:	Fri, 21 Dec 2012 17:28:40 -0500
From:	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
To:	Yinghai Lu <yinghai@...nel.org>
Cc:	Thomas Gleixner <tglx@...utronix.de>, Ingo Molnar <mingo@...e.hu>,
	"H. Peter Anvin" <hpa@...or.com>,
	"Eric W. Biederman" <ebiederm@...ssion.com>,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v5 03/13] x86, 64bit: Set extra ident mapping for whole
 kernel range

On Tue, Nov 27, 2012 at 11:50:32PM -0800, Yinghai Lu wrote:
> Current when kernel is loaded above 1G, only [_text, _text+2M] is set
> up with extra ident page table.
> That is not enough, some variables that could be used early are out of
> that range, like BRK for early page table.
> Need to set map for [_text, _end] include text/data/bss/brk...
> 
> Also current kernel is not allowed to be loaded above 512g, it thinks
> that address is too big.
> We need to add one extra spare page for level3 to point that 512g range.
> Need to check _text range and set level4 pg with that spare level3 page,
> and set level3 with level2 page to cover [_text, _end] with extra mapping.
> 
> At last, to handle crossing GB boundary, we need to add another
> level2 spare page. To handle crossing 512GB boundary, we need to
> add another level3 spare page to next 512G range.
> 
> Test on with kexec-tools with local test code to force loading kernel
> cross 1G, 5G, 512g, 513g.
> 
> We need this to put relocatable 64bit bzImage high above 1g.
> 
> -v4: add crossing GB boundary handling.
> -v5: use spare pages from BRK, so could save pages when kernel is not
> 	loaded above 1GB.
> 
> Signed-off-by: Yinghai Lu <yinghai@...nel.org>
> Cc: "Eric W. Biederman" <ebiederm@...ssion.com>
> ---
>  arch/x86/kernel/head_64.S |  203 +++++++++++++++++++++++++++++++++++++++++----
>  1 files changed, 187 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> index 94bf9cc..338799a 100644
> --- a/arch/x86/kernel/head_64.S
> +++ b/arch/x86/kernel/head_64.S
> @@ -20,6 +20,7 @@
>  #include <asm/processor-flags.h>
>  #include <asm/percpu.h>
>  #include <asm/nops.h>
> +#include <asm/setup.h>
>  
>  #ifdef CONFIG_PARAVIRT
>  #include <asm/asm-offsets.h>
> @@ -42,6 +43,13 @@ L3_PAGE_OFFSET = pud_index(__PAGE_OFFSET)
>  L4_START_KERNEL = pgd_index(__START_KERNEL_map)
>  L3_START_KERNEL = pud_index(__START_KERNEL_map)
>  
> +/* two for level3, and two for level2 */
> +SPARE_MAP_SIZE = (4 * PAGE_SIZE)
> +RESERVE_BRK(spare_map, SPARE_MAP_SIZE)

Perhaps 'spare_directory' ? Or 'spare_table' ?


> +
> +#define spare_page(x)	(__brk_base + (x) * PAGE_SIZE)
> +#define add_one_spare_page	addq $PAGE_SIZE, _brk_end(%rip)
> +
>  	.text
>  	__HEAD
>  	.code64
> @@ -78,12 +86,6 @@ startup_64:
>  	testl	%eax, %eax
>  	jnz	bad_address
>  
> -	/* Is the address too large? */
> -	leaq	_text(%rip), %rdx
> -	movq	$PGDIR_SIZE, %rax
> -	cmpq	%rax, %rdx
> -	jae	bad_address
> -
>  	/* Fixup the physical addresses in the page table
>  	 */
>  	addq	%rbp, init_level4_pgt + 0(%rip)
> @@ -97,25 +99,196 @@ startup_64:
>  
>  	addq	%rbp, level2_fixmap_pgt + (506*8)(%rip)
>  
> -	/* Add an Identity mapping if I am above 1G */
> +	/* Add an Identity mapping if _end is above 1G */
> +	leaq	_end(%rip), %r9
> +	decq	%r9
> +	cmp	$PUD_SIZE, %r9
> +	jl	ident_complete
> +
> +	/* Clear spare pages */
> +	leaq	__brk_base(%rip), %rdi
> +	xorq	%rax, %rax
> +	movq	$(SPARE_MAP_SIZE/8), %rcx
> +1:	decq	%rcx
> +	movq	%rax, (%rdi)
> +	leaq	8(%rdi), %rdi
> +	jnz	1b
> +
> +	/* get end */
> +	andq	$PMD_PAGE_MASK, %r9
> +	/* round start to 1G if it is below 1G */
>  	leaq	_text(%rip), %rdi
>  	andq	$PMD_PAGE_MASK, %rdi
> +	cmp	$PUD_SIZE, %rdi
> +	jg	1f
> +	movq	$PUD_SIZE, %rdi
> +1:
> +	/* get 512G index */
> +	movq	%r9, %r8
> +	shrq	$PGDIR_SHIFT, %r8
> +	andq	$(PTRS_PER_PGD - 1), %r8
> +	movq	%rdi, %rax
> +	shrq	$PGDIR_SHIFT, %rax
> +	andq	$(PTRS_PER_PGD - 1), %rax
> +
> +	/* cross two 512G ? */
> +	cmp	%r8, %rax
> +	jne	set_level3_other_512g
> +
> +	/* all in first 512G ? */
> +	cmp	$0, %rax
> +	je	skip_level3_spare
> +
> +	/* same 512G other than first 512g */
> +	/*
> +	 * We need one level3, one or two level 2,
> +	 * so use first one for level3.
> +	 */
> +	leaq    (spare_page(0) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	leaq    init_level4_pgt(%rip), %rbx
> +	movq    %rdx, 0(%rbx, %rax, 8)
> +	addq    $L4_PAGE_OFFSET, %rax
> +	movq    %rdx, 0(%rbx, %rax, 8)
> +	/* one level3 in BRK */
> +	add_one_spare_page
> +
> +	/* get 1G index */
> +	movq    %r9, %r8
> +	shrq    $PUD_SHIFT, %r8
> +	andq    $(PTRS_PER_PUD - 1), %r8
> +	movq    %rdi, %rax
> +	shrq    $PUD_SHIFT, %rax
> +	andq    $(PTRS_PER_PUD - 1), %rax
> +
> +	/* same 1G ? */
> +	cmp     %r8, %rax
> +	je	set_level2_start_only_not_first_512g
> +
> +	/* set level2 for end */
> +	leaq    spare_page(0)(%rip), %rbx
> +	leaq    (spare_page(2) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	movq    %rdx, 0(%rbx, %r8, 8)
> +	/* second one level2 in BRK */
> +	add_one_spare_page
> +
> +set_level2_start_only_not_first_512g:
> +	leaq    spare_page(0)(%rip), %rbx
> +	leaq    (spare_page(1) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	movq    %rdx, 0(%rbx, %rax, 8)
> +	/* first one level2 in BRK */
> +	add_one_spare_page
> +
> +	/* one spare level3 before level2*/
> +	leaq    spare_page(1)(%rip), %rbx
> +	jmp	set_level2_spare
> +
> +set_level3_other_512g:
> +	/*
> +	 * We need one or two level3, and two level2,
> +	 * so use first two for level2.
> +	 */
> +	/* for level2 last on first 512g */
> +	leaq	level3_ident_pgt(%rip), %rcx
> +	/* start is in first 512G ? */
> +	cmp	$0, %rax
> +	je	set_level2_start_other_512g
>  
> +	/* Set level3 for _text */
> +	leaq	(spare_page(3) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	leaq	init_level4_pgt(%rip), %rbx
> +	movq	%rdx, 0(%rbx, %rax, 8)
> +	addq	$L4_PAGE_OFFSET, %rax
> +	movq	%rdx, 0(%rbx, %rax, 8)
> +	/* first one level3 in BRK */
> +	add_one_spare_page
> +
> +	/* for level2 last not on first 512G */
> +	leaq	spare_page(3)(%rip), %rcx
> +
> +set_level2_start_other_512g:
> +	/* always need to set level2 */
>  	movq	%rdi, %rax
>  	shrq	$PUD_SHIFT, %rax
>  	andq	$(PTRS_PER_PUD - 1), %rax
> -	jz	ident_complete
> +	movq	%rcx, %rbx  /* %rcx : level3 spare or level3_ident_pgt */
> +	leaq	(spare_page(0) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	movq	%rdx, 0(%rbx, %rax, 8)
> +	/* first one level2 in BRK */
> +	add_one_spare_page
>  
> -	leaq	(level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +set_level3_end_other_512g:
> +	leaq	(spare_page(2) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	leaq	init_level4_pgt(%rip), %rbx
> +	movq	%rdx, 0(%rbx, %r8, 8)
> +	addq	$L4_PAGE_OFFSET, %r8
> +	movq	%rdx, 0(%rbx, %r8, 8)
> +	/* second one level3 in BRK */
> +	add_one_spare_page
> +
> +	/* always need to set level2 */
> +	movq	%r9, %r8
> +	shrq	$PUD_SHIFT, %r8
> +	andq	$(PTRS_PER_PUD - 1), %r8
> +	leaq	spare_page(2)(%rip), %rbx
> +	leaq	(spare_page(1) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	movq	%rdx, 0(%rbx, %r8, 8)
> +	/* second one level2 in BRK */
> +	add_one_spare_page
> +
> +	/* no spare level3 before level2 */
> +	leaq    spare_page(0)(%rip), %rbx
> +	jmp	set_level2_spare
> +
> +skip_level3_spare:
> +	/* We have one or two level2 */
> +	/* get 1G index */
> +	movq	%r9, %r8
> +	shrq	$PUD_SHIFT, %r8
> +	andq	$(PTRS_PER_PUD - 1), %r8
> +	movq	%rdi, %rax
> +	shrq	$PUD_SHIFT, %rax
> +	andq	$(PTRS_PER_PUD - 1), %rax
> +
> +	/* same 1G ? */
> +	cmp	%r8, %rax
> +	je	set_level2_start_only_first_512g
> +
> +	/* set level2 without level3 spare */
> +	leaq	level3_ident_pgt(%rip), %rbx
> +	leaq	(spare_page(1) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
> +	movq	%rdx, 0(%rbx, %r8, 8)
> +	/* second one level2 in BRK */
> +	add_one_spare_page
> +
> +set_level2_start_only_first_512g:
> +	/*  set level2 without level3 spare */
>  	leaq	level3_ident_pgt(%rip), %rbx
> +	leaq	(spare_page(0) - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx
>  	movq	%rdx, 0(%rbx, %rax, 8)
> +	/* first one level2 in BRK */
> +	add_one_spare_page
>  
> +	/* no spare level3 */
> +	leaq    spare_page(0)(%rip), %rbx
> +
> +set_level2_spare:
>  	movq	%rdi, %rax
>  	shrq	$PMD_SHIFT, %rax
>  	andq	$(PTRS_PER_PMD - 1), %rax
>  	leaq	__PAGE_KERNEL_IDENT_LARGE_EXEC(%rdi), %rdx
> -	leaq	level2_spare_pgt(%rip), %rbx
> -	movq	%rdx, 0(%rbx, %rax, 8)
> +	/* %rbx is set before */
> +	movq	%r9, %r8
> +	shrq	$PMD_SHIFT, %r8
> +	andq	$(PTRS_PER_PMD - 1), %r8
> +	cmp	%r8, %rax
> +	jl	1f
> +	addq	$PTRS_PER_PMD, %r8
> +1:	movq	%rdx, 0(%rbx, %rax, 8)
> +	addq	$PMD_SIZE, %rdx
> +	incq	%rax
> +	cmp	%r8, %rax
> +	jle	1b
> +
>  ident_complete:
>  
>  	/*
> @@ -423,11 +596,9 @@ NEXT_PAGE(level2_kernel_pgt)
>  	 *  If you want to increase this then increase MODULES_VADDR
>  	 *  too.)
>  	 */
> -	PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
> -		KERNEL_IMAGE_SIZE/PMD_SIZE)
> -
> -NEXT_PAGE(level2_spare_pgt)
> -	.fill   512, 8, 0
> +	PMDS(0, __PAGE_KERNEL_LARGE_EXEC, KERNEL_IMAGE_SIZE/PMD_SIZE)
> +	/* hold the whole page */
> +	.fill (PTRS_PER_PMD - (KERNEL_IMAGE_SIZE/PMD_SIZE)), 8, 0
>  
>  #undef PMDS
>  #undef NEXT_PAGE
> -- 
> 1.7.7
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ