lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sun, 01 Mar 2009 16:55:05 -0800
From:	Yinghai Lu <yinghai@...nel.org>
To:	"H. Peter Anvin" <hpa@...or.com>
CC:	Ingo Molnar <mingo@...e.hu>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Thomas Gleixner <tglx@...utronix.de>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Jeremy Fitzhardinge <jeremy.fitzhardinge@...rix.com>
Subject: Re: [PATCH] x86: put initial_pg_tables into bss -v2

H. Peter Anvin wrote:
> Yinghai Lu wrote:
>>
>>> No, this is garbage.  If you're insisting on getting rid of the brk-like
>>> allocation patterns,> YH
>  then you have to get an alternative dynamic
>>> allocator available to the pre-paging code.  Now, there is no reason we
>>> couldn't execute C code before enabling paging, although the code would
>>> either have to be PIC or linked at the physical address.
>>
>> you can use find_e820_area()/reserve_early() pair to find right
>> position for that.
>>
> 
> This stuff is currently done before paging is enabled, and existing C
> code can't be run as-is.  There are three ways to deal with that:
> 
> a) compile some of the code with -fPIC/-fPIE.
> b) link some code twice with different offsets.
> c) play really ugly games with segments (thus making the virtualization
>    guys unhappy.)
> 
> Pretty much, these options all suck.  Another option, of course, is to
> generate a fixed amount of page tables just to get us into the C
> environment, generate a new set, *and reclaim the old ones*.  That way
> we're not wasting memory if we're on a small-RAM machine.
> 
> It's still really ugly, though.  A much easier and cleaner way would
> seem to be to calculate a far limit on the brk and then marking it as a
> formal (non-alloc) section in the linker script and vmlinux file.  That
> way anything that examines the vmlinux file will see it as an exclusion
> section.  We can (and should) even verify that we don't overflow the brk
> and panic if we do.
> 
please check

[PATCH] x86: put initial_pg_tables into .data -v3

Impact: cleanup

Don't use ram after _end blindly for pagetables.
put those pg table into .data

also remove init_pg_tables_start/end tricks all around

v2: keep initial page table up to 512M only.
v3: acctually it is in .data.page_aligned
    add KERNEL_IMAGE_SIZE for 32bit, so small set it some small value than 512M
	when installed RAM is smaller than 512M
    initial_pgtable will cover to KERNEL_IMAGE_SIZE to avoid wasting.

Signed-off-by: Yinghai Lu <yinghai@...nel.org>

---
 arch/x86/Kconfig                     |    9 +++++
 arch/x86/include/asm/page_32_types.h |    9 +++++
 arch/x86/include/asm/pgtable_32.h    |    3 -
 arch/x86/include/asm/setup.h         |    3 -
 arch/x86/kernel/head32.c             |    3 -
 arch/x86/kernel/head_32.S            |   53 ++++++++++++-----------------------
 arch/x86/kernel/setup.c              |    9 -----
 arch/x86/kernel/vmlinux_32.lds.S     |    9 +++--
 arch/x86/lguest/boot.c               |    8 -----
 arch/x86/xen/mmu.c                   |    4 --
 10 files changed, 44 insertions(+), 66 deletions(-)

Index: linux-2.6/arch/x86/include/asm/setup.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/setup.h
+++ linux-2.6/arch/x86/include/asm/setup.h
@@ -105,9 +105,6 @@ extern struct boot_params boot_params;
 void __init i386_start_kernel(void);
 extern void probe_roms(void);
 
-extern unsigned long init_pg_tables_start;
-extern unsigned long init_pg_tables_end;
-
 #else
 void __init x86_64_start_kernel(char *real_mode);
 void __init x86_64_start_reservations(char *real_mode_data);
Index: linux-2.6/arch/x86/kernel/head32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head32.c
+++ linux-2.6/arch/x86/kernel/head32.c
@@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
 	}
 #endif
-	reserve_early(init_pg_tables_start, init_pg_tables_end,
-			"INIT_PG_TABLE");
-
 	reserve_ebda_region();
 
 	/*
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -38,42 +38,30 @@
 #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
 
 /*
- * This is how much memory *in addition to the memory covered up to
- * and including _end* we need mapped initially.
- * We need:
- *  - one bit for each possible page, but only in low memory, which means
- *     2^32/4096/8 = 128K worst case (4G/4G split.)
+ * This is how much memory for page table to and including _end
+ * we need mapped initially.
  *  - enough space to map all low memory, which means
- *     (2^32/4096) / 1024 pages (worst case, non PAE)
- *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
- *  - a few pages for allocator use before the kernel pagetable has
- *     been set up
+ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
+ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
  *
  * Modulo rounding, each megabyte assigned here requires a kilobyte of
  * memory, which is currently unreclaimed.
  *
  * This should be a multiple of a page.
+ *
+ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
+ * and small than max_low_pfn, otherwise will waste some page table entries
  */
-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
-
-/*
- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
- * pagetables from above the 16MB DMA limit, so we'll have to set
- * up pagetables 16MB more (worst-case):
- */
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
-LOW_PAGES = LOW_PAGES + 0x1000000
-#endif
+LOW_PAGES = (KERNEL_IMAGE_SIZE + PAGE_SIZE_asm - 1)>>PAGE_SHIFT
 
 #if PTRS_PER_PMD > 1
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
 #else
 PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
 #endif
-BOOTBITMAP_SIZE = LOW_PAGES / 8
 ALLOCATOR_SLOP = 4
 
-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
+INIT_MAP_SIZE = (PAGE_TABLE_SIZE + ALLOCATOR_SLOP) * PAGE_SIZE_asm
 
 /*
  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
@@ -166,10 +154,9 @@ num_subarch_entries = (. - subarch_entri
 
 /*
  * Initialize page tables.  This creates a PDE and a set of page
- * tables, which are located immediately beyond _end.  The variable
- * init_pg_tables_end is set up to point to the first "safe" location.
+ * tables, which are located immediately beyond _end. 
  * Mappings are created both at virtual address 0 (identity mapping)
- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
+ * and PAGE_OFFSET for up to _end
  *
  * Note that the stack is not yet set up!
  */
@@ -191,7 +178,6 @@ default_entry:
 	xorl %ebx,%ebx				/* %ebx is kept at zero */
 
 	movl $pa(pg0), %edi
-	movl %edi, pa(init_pg_tables_start)
 	movl $pa(swapper_pg_pmd), %edx
 	movl $PTE_IDENT_ATTR, %eax
 10:
@@ -209,14 +195,13 @@ default_entry:
 	loop 11b
 
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables.
+	 * End condition: we must map up to the end.
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $KERNEL_IMAGE_SIZE, %ebp
+	addl PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
 1:
-	movl %edi,pa(init_pg_tables_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -228,7 +213,6 @@ default_entry:
 page_pde_offset = (__PAGE_OFFSET >> 20);
 
 	movl $pa(pg0), %edi
-	movl %edi, pa(init_pg_tables_start)
 	movl $pa(swapper_pg_dir), %edx
 	movl $PTE_IDENT_ATTR, %eax
 10:
@@ -242,14 +226,13 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
 	addl $0x1000,%eax
 	loop 11b
 	/*
-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
-	 * bytes beyond the end of our own page tables; the +0x007 is
+	 * End condition: we must map up to end, the +0x007 is
 	 * the attribute bits
 	 */
-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
+	movl $KERNEL_IMAGE_SIZE, %ebp
+	addl PTE_IDENT_ATTR, %ebp
 	cmpl %ebp,%eax
 	jb 10b
-	movl %edi,pa(init_pg_tables_end)
 	shrl $12, %eax
 	movl %eax, pa(max_pfn_mapped)
 
@@ -662,6 +645,8 @@ ENTRY(swapper_pg_dir)
 # endif
 	.align PAGE_SIZE_asm		/* needs to be page-sized too */
 #endif
+ENTRY(pg0)
+	.fill INIT_MAP_SIZE,1,0
 
 .data
 ENTRY(stack_start)
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -158,11 +158,6 @@ static struct resource bss_resource = {
 
 
 #ifdef CONFIG_X86_32
-/* This value is set up by the early boot code to point to the value
-   immediately after the boot time page tables.  It contains a *physical*
-   address, and must not be in the .bss segment! */
-unsigned long init_pg_tables_start __initdata = ~0UL;
-unsigned long init_pg_tables_end __initdata = ~0UL;
 
 static struct resource video_ram_resource = {
 	.name	= "Video RAM area",
@@ -715,11 +710,7 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.start_code = (unsigned long) _text;
 	init_mm.end_code = (unsigned long) _etext;
 	init_mm.end_data = (unsigned long) _edata;
-#ifdef CONFIG_X86_32
-	init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
-#else
 	init_mm.brk = (unsigned long) &_end;
-#endif
 
 	code_resource.start = virt_to_phys(_text);
 	code_resource.end = virt_to_phys(_etext)-1;
Index: linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/vmlinux_32.lds.S
+++ linux-2.6/arch/x86/kernel/vmlinux_32.lds.S
@@ -190,9 +190,6 @@ SECTIONS
 	. = ALIGN(4);
 	__bss_stop = .;
   	_end = . ;
-	/* This is where the kernel creates the early boot page tables */
-	. = ALIGN(PAGE_SIZE);
-	pg0 = . ;
   }
 
   /* Sections to be discarded */
@@ -205,6 +202,12 @@ SECTIONS
   DWARF_DEBUG
 }
 
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+	"kernel image bigger than KERNEL_IMAGE_SIZE")
+
 #ifdef CONFIG_KEXEC
 /* Link time checks */
 #include <asm/kexec.h>
Index: linux-2.6/arch/x86/lguest/boot.c
===================================================================
--- linux-2.6.orig/arch/x86/lguest/boot.c
+++ linux-2.6/arch/x86/lguest/boot.c
@@ -1051,14 +1051,6 @@ __init void lguest_init(void)
 	 * lguest_init() where the rest of the fairly chaotic boot setup
 	 * occurs. */
 
-	/* The native boot code sets up initial page tables immediately after
-	 * the kernel itself, and sets init_pg_tables_end so they're not
-	 * clobbered.  The Launcher places our initial pagetables somewhere at
-	 * the top of our physical memory, so we don't need extra space: set
-	 * init_pg_tables_end to the end of the kernel. */
-	init_pg_tables_start = __pa(pg0);
-	init_pg_tables_end = __pa(pg0);
-
 	/* As described in head_32.S, we map the first 128M of memory. */
 	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
 
Index: linux-2.6/arch/x86/xen/mmu.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/mmu.c
+++ linux-2.6/arch/x86/xen/mmu.c
@@ -1716,9 +1716,7 @@ __init pgd_t *xen_setup_kernel_pagetable
 {
 	pmd_t *kernel_pmd;
 
-	init_pg_tables_start = __pa(pgd);
-	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
-	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+	max_pfn_mapped = PFN_DOWN(__pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE + 512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
 	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
Index: linux-2.6/arch/x86/include/asm/pgtable_32.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/pgtable_32.h
+++ linux-2.6/arch/x86/include/asm/pgtable_32.h
@@ -42,9 +42,6 @@ extern void set_pmd_pfn(unsigned long, u
  */
 #undef TEST_ACCESS_OK
 
-/* The boot page tables (all created as a single array) */
-extern unsigned long pg0[];
-
 #ifdef CONFIG_X86_PAE
 # include <asm/pgtable-3level.h>
 #else
Index: linux-2.6/arch/x86/include/asm/page_32_types.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/page_32_types.h
+++ linux-2.6/arch/x86/include/asm/page_32_types.h
@@ -39,6 +39,15 @@
 #define __VIRTUAL_MASK_SHIFT	32
 #endif	/* CONFIG_X86_PAE */
 
+/*
+ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
+ */
+#ifndef CONFIG_VMLINUX_RAM_SIZE
+# define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+#else
+# define KERNEL_IMAGE_SIZE	CONFIG_VMLINUX_RAM_SIZE
+#endif
+
 #ifndef __ASSEMBLY__
 
 /*
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1030,6 +1030,15 @@ config PAGE_OFFSET
 	default 0xC0000000
 	depends on X86_32
 
+config VMLINUX_RAM_SIZE
+	hex "Initial ram size directly mapped"
+	range 0x400000 0x20000000
+	default 0x1000000
+	depends on X86_32 && EMBEDDED
+	---help---
+	  Select ram size that initial page table will cover. for system less 512M ram installed.
+	  the value should be greater than vmlinux and less than 512M
+
 config HIGHMEM
 	def_bool y
 	depends on X86_32 && (HIGHMEM64G || HIGHMEM4G)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists