[PATCH] x86_64: fix setup_node_bootmem to support big mem excluding with memmap typical case: four sockets system, every node has 4g ram, and we are using memmap=10g$4g to mask out memory on node1 and node2 when numa is enabled, early_node_mem is used to get node_data and node_bootmap if it can not get from same node with find_e820_area, it will use alloc_bootmem to get buff from previous nodes. so check it and issue info about it. need to move early_res_to_bootmem into every setup_node_bootmem. and it takes range that node has. otherwise alloc_bootmem could return addr that reserved early. need to apply it after [PATCH] mm: make reserve_bootmem can crossed the nodes Signed-off-by: Yinghai Lu Index: linux-2.6/arch/x86/mm/numa_64.c =================================================================== --- linux-2.6.orig/arch/x86/mm/numa_64.c +++ linux-2.6/arch/x86/mm/numa_64.c @@ -188,6 +188,7 @@ void __init setup_node_bootmem(int nodei unsigned long bootmap_start, nodedata_phys; void *bootmap; const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); + int nid; start = round_up(start, ZONE_ALIGN); @@ -210,9 +211,20 @@ void __init setup_node_bootmem(int nodei NODE_DATA(nodeid)->node_start_pfn = start_pfn; NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; - /* Find a place for the bootmem map */ + /* + * Find a place for the bootmem map + * nodedata_phys could be on other nodes by alloc_bootmem, + * so need to sure bootmap_start not to be small, otherwise + * early_node_mem will get that with find_e820_area instead + * of alloc_bootmem, that could clash with reserved range + */ bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); + nid = phys_to_nid(nodedata_phys); + if (nid == nodeid) + bootmap_start = round_up(nodedata_phys + pgdat_size, + PAGE_SIZE); + else + bootmap_start = round_up(start, PAGE_SIZE); /* * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like * to use that to align to PAGE_SIZE @@ -237,10 +249,29 @@ void __init setup_node_bootmem(int nodei free_bootmem_with_active_regions(nodeid, end); - reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, - BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, - bootmap_pages<name, name, sizeof(r->name) - 1); } -void __init early_res_to_bootmem(void) +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } Index: linux-2.6/include/asm-x86/e820_64.h =================================================================== --- linux-2.6.orig/include/asm-x86/e820_64.h +++ linux-2.6/include/asm-x86/e820_64.h @@ -41,7 +41,7 @@ extern struct e820map e820; extern void update_e820(void); extern void reserve_early(unsigned long start, unsigned long end, char *name); -extern void early_res_to_bootmem(void); +extern void early_res_to_bootmem(unsigned long start, unsigned long end); #endif/*!__ASSEMBLY__*/ Index: linux-2.6/arch/x86/kernel/setup_64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup_64.c +++ linux-2.6/arch/x86/kernel/setup_64.c @@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_ bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); e820_register_active_regions(0, start_pfn, end_pfn); free_bootmem_with_active_regions(0, end_pfn); + early_res_to_bootmem(0, end_pfn<