lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 16 Dec 2010 16:39:43 -0800
From:	Yinghai Lu <yinghai@...nel.org>
To:	"H. Peter Anvin" <hpa@...or.com>
CC:	Vivek Goyal <vgoyal@...hat.com>,
	"Eric W. Biederman" <ebiederm@...ssion.com>,
	Stanislaw Gruszka <sgruszka@...hat.com>,
	Ingo Molnar <mingo@...e.hu>,
	Thomas Gleixner <tglx@...utronix.de>,
	Maxim Uvarov <muvarov@...il.com>, linux-kernel@...r.kernel.org,
	Neil Horman <nhorman@...hat.com>,
	Jeremy Fitzhardinge <jeremy@...p.org>
Subject: Re: kdump broken on 2.6.37-rc4

On 12/16/2010 03:49 PM, Yinghai Lu wrote:
> On 12/16/2010 03:30 PM, Yinghai Lu wrote:
>> On 12/16/2010 11:58 AM, H. Peter Anvin wrote:
>>> On 12/16/2010 09:28 AM, Yinghai Lu wrote:
>>>>
>>>> the brk is complaining if i change that to 
>>>>
>>>>  	if (end > ((-__PAGE_OFFSET-(128 <<20)-1) & 0x7fffffff))
>>>>  		error("Destination address too large");
>>>>
>>>> brk is complaining when try to get more for dmi ...
>>>> ...
>>>> I'm in purgatory
>>>> bootconsole [uart0] enabled
>>>> Kernel Layout:
>>>>   .text: [0x2e000000-0x2e3f08ca]
>>>> .rodata: [0x2e3f2000-0x2e5a2fff]
>>>>   .data: [0x2e5a3000-0x2e5f6467]
>>>>   .init: [0x2e5f7000-0x2e670fff]
>>>>    .bss: [0x2e675000-0x2e76ffff]
>>>>    .brk: [0x2e770000-0x2e894fff]
>>>>     memblock_x86_reserve_range: [0x00001000-0x00001fff]    EX TRAMPOLINE
>>>>     memblock_x86_reserve_range: [0x2e000000-0x2e76ffff]    TEXT DATA BSS
>>>>     memblock_x86_reserve_range: [0x35bdd000-0x35f49fff]          RAMDISK
>>>>     memblock_x86_reserve_range: [0x0009c800-0x000fffff]  * BIOS reserved
>>>> Initializing cgroup subsys cpuset
>>>> Initializing cgroup subsys cpu
>>>> Linux version 2.6.37-rc5-tip+ (root@...12-3214-189-181) (gcc version 4.4.4 20100726 (Red Hat 4.4.4-13) (GCC) ) #4 SMP Wed Dec 15 11:04:32 PST 2010
>>>> KERNEL supported cpus:
>>>>   Intel GenuineIntel
>>>>   AMD AuthenticAMD
>>>>   NSC Geode by NSC
>>>>   Cyrix CyrixInstead
>>>>   Centaur CentaurHauls
>>>>   Transmeta GenuineTMx86
>>>>   Transmeta TransmetaCPU
>>>>   UMC UMC UMC UMC
>>>> BIOS-provided physical RAM map:
>>>>  BIOS-e820: [0x00000000000100-0x0000000009c7ff] (usable)
>>>>  BIOS-e820: [0x0000000009c800-0x0000000009ffff] (reserved)
>>>>  BIOS-e820: [0x000000000e0000-0x000000000fffff] (reserved)
>>>>  BIOS-e820: [0x00000000100000-0x0000007ff9ffff] (usable)
>>>>  BIOS-e820: [0x0000007ffae000-0x0000007ffaffff] (usable)
>>>>  BIOS-e820: [0x0000007ffb0000-0x0000007ffbdfff] (ACPI data)
>>>>  BIOS-e820: [0x0000007ffbe000-0x0000007ffeffff] (ACPI NVS)
>>>>  BIOS-e820: [0x0000007fff0000-0x0000007fffffff] (reserved)
>>>>  BIOS-e820: [0x000000e0000000-0x000000efffffff] (reserved)
>>>>  BIOS-e820: [0x000000fec00000-0x000000fec00fff] (reserved)
>>>>  BIOS-e820: [0x000000fee00000-0x000000feefffff] (reserved)
>>>>  BIOS-e820: [0x000000ff700000-0x000000ffffffff] (reserved)
>>>> last_pfn = 0x7ffb0 max_arch_pfn = 0x1000000
>>>> NX (Execute Disable) protection: active
>>>> user-defined physical RAM map:
>>>>  user: [0x00000000000000-0x0000000009ffff] (usable)
>>>>  user: [0x0000002e000000-0x00000035f59fff] (usable)
>>>>  user: [0x0000007ffb0000-0x0000007ffeffff] (ACPI data)
>>>> DMI present.
>>>> BUG: Int 6: CR2   (null)
>>>>      EDI 00000019  ESI ff940c18  EBP   (null)  ESP ee5a5e84
>>>>      EBX ee5cfb68  EDX 00000006  ECX 00000019  EAX ee8e6019
>>>>      err   (null)  EIP ee5fb4dd   CS 00000060  flg 00010002
>>>> Stack: 00000019 ee62bf45 ff942000 00000563 00000001 ff940c00 000018c7 ee62bf83
>>>>        ff940c00 ee62c063 80000000 ee3e6f2f ee50a3c0 ee5a5ed4 ff940c00 ff940c43
>>>>        000018c7   (null) ee3173d4 000018c8 0000007f ff940c00 ff90b1bf ee5a5f18
>>>> Pid: 0, comm: swapper Not tainted 2.6.37-rc5-tip+ #4
>>>> Call Trace:
>>>>  [<ee3dd1d5>] ? hlt_loop+0x0/0x3
>>>>  [<ee5fb4dd>] ? extend_brk+0x31/0x44
>>>
>>> I'm assuming it bails due to:
>>>
>>> 	BUG_ON((char *)(_brk_end + size) > __brk_limit);
>>>
>>> ... could you find out what _brk_end and __brk_limit are?
>>
>> void __init print_kernel_layout(void)
>> {
>>         printk("Kernel Layout:\n");
>>         printk("  .text: [%#010lx-%#010lx]\n", __pa_symbol(&_text), __pa_symbol(&_etext) - 1);
>>         printk(".rodata: [%#010lx-%#010lx]\n", __pa_symbol(&__start_rodata), __pa_symbol(&__end_rodata) - 1);
>>         printk("  .data: [%#010lx-%#010lx]\n", __pa_symbol(&_sdata), __pa_symbol(&_edata) - 1);
>>         printk("  .init: [%#010lx-%#010lx]\n", __pa_symbol(&__init_begin), __pa_symbol(&__init_end) - 1);
>>         printk("   .bss: [%#010lx-%#010lx]\n", __pa_symbol(&__bss_start), __pa_symbol(&__bss_stop) - 1);
>>         printk("   .brk: [%#010lx-%#010lx]\n", __pa_symbol(&__brk_base), __pa_symbol(&__brk_limit) - 1);
>> }
>>
>>>> Kernel Layout:
>>>>   .text: [0x2e000000-0x2e3f08ca]
>>>> .rodata: [0x2e3f2000-0x2e5a2fff]
>>>>   .data: [0x2e5a3000-0x2e5f6467]
>>>>   .init: [0x2e5f7000-0x2e670fff]
>>>>    .bss: [0x2e675000-0x2e76ffff]
>>>>    .brk: [0x2e770000-0x2e894fff]
>>
>> DMI present.
>> _brk_end: ee8e6000, __brk_limit: ee895000 
>>
> 
> looks like in arch/x86/kernel/head_32.S
> will put page_table in _brk....
> 
> if the whole range is some high, it will use more buffer in _brk for ...
> 
> brk pre-calucation could be wrong and too small.

32bit have assume KERNEL_IMAGE_SIZE is 512M
arch/x86/include/asm/page_32_types.h:#define KERNEL_IMAGE_SIZE  (512 * 1024 * 1024)
arch/x86/include/asm/page_64_types.h:#define KERNEL_IMAGE_SIZE  (512 * 1024 * 1024)
arch/x86/kernel/head64.c:       BUILD_BUG_ON(MODULES_VADDR-KERNEL_IMAGE_START < KERNEL_IMAGE_SIZE);
arch/x86/kernel/head64.c:       BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
arch/x86/kernel/head64.c:       max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
arch/x86/kernel/head_32.S: *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
arch/x86/kernel/head_32.S: *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
arch/x86/kernel/head_32.S: * KERNEL_IMAGE_SIZE should be greater than pa(_end)
arch/x86/kernel/head_32.S:KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT 

and use that to estimate BRK size.

so we could change the BRK calculating code to handle 896M or just limit crashkernel for 32bit to 512M...

handle 896M one:

---
 arch/x86/boot/compressed/misc.c |    2 +-
 arch/x86/kernel/head_32.S       |    4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

Index: linux-2.6/arch/x86/boot/compressed/misc.c
===================================================================
--- linux-2.6.orig/arch/x86/boot/compressed/misc.c
+++ linux-2.6/arch/x86/boot/compressed/misc.c
@@ -365,7 +365,7 @@ asmlinkage void decompress_kernel(void *
 	if (heap > 0x3fffffffffffUL)
 		error("Destination address too large");
 #else
-	if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
+	if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
 		error("Destination address too large");
 #endif
 #ifndef CONFIG_RELOCATABLE
Index: linux-2.6/arch/x86/kernel/head_32.S
===================================================================
--- linux-2.6.orig/arch/x86/kernel/head_32.S
+++ linux-2.6/arch/x86/kernel/head_32.S
@@ -68,8 +68,10 @@ MAPPING_BEYOND_END = \
  * Worst-case size of the kernel mapping we need to make:
  * the worst-case size of the kernel itself, plus the extra we need
  * to map for the linear map.
+ * to make crashkernel bzImage to stay high, make it map to 896M
+ *  but it will be claimed back when brk is concluded. So no wasting.
  */
-KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT
+KERNEL_PAGES = (KERNEL_IMAGE_SIZE + (384<<20) + MAPPING_BEYOND_END)>>PAGE_SHIFT
 
 INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
 RESERVE_BRK(pagetables, INIT_MAP_SIZE)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ