[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20060907142744.GA31799@skynet.ie>
Date: Thu, 7 Sep 2006 15:27:44 +0100
From: mel@...net.ie (Mel Gorman)
To: Paul Jackson <pj@....com>
Cc: linux-kernel@...r.kernel.org, akpm@...l.org, haveblue@...ibm.com,
apw@...dowen.org, ak@....de, benh@...nel.crashing.org,
paulus@...ba.org, kmannth@...il.com, tony.luck@...el.com,
kamezawa.hiroyu@...fujitsu.com, y-goto@...fujitsu.com
Subject: [PATCH] Fix memmap accounting by approximating the map size
Arch-independent zone-sizing uses account_memmap() in an attempt to accurately
account for how much memory was used in a zone by memmap. Watermarks and
per-cpu sizes initialisations then take the memmap into account. However,
the memmap may span multiple zones and in one case, there was an underflow
causing boot failures.
The fix that perfectly accounts for memory consumed by memmap is complicated
with no clear benefit. The architecture-specific code in x86_64 was simpler
because it approximated how much memory was consumed for memmap backing that
zone regardless of where the memmap was really stored.
This patch ditches the account_memmap() complexity and replaces with the
simple approximation used by x86_64 while ensuring no underflow occurs.
Signed-off-by: Mel Gorman <mel@....ul.ie>
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.18-rc4-mm3-clean/mm/page_alloc.c linux-2.6.18-rc4-mm3-101_fix_account_memmap/mm/page_alloc.c
--- linux-2.6.18-rc4-mm3-clean/mm/page_alloc.c 2006-08-28 15:05:30.000000000 +0100
+++ linux-2.6.18-rc4-mm3-101_fix_account_memmap/mm/page_alloc.c 2006-09-07 14:36:05.000000000 +0100
@@ -2364,58 +2364,6 @@ static void __init calculate_node_totalp
realtotalpages);
}
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
-/* Account for mem_map for CONFIG_FLAT_NODE_MEM_MAP */
-unsigned long __meminit account_memmap(struct pglist_data *pgdat,
- int zone_index)
-{
- unsigned long pages = 0;
- if (zone_index == memmap_zone_idx(pgdat->node_mem_map)) {
- pages = pgdat->node_spanned_pages;
- pages = (pages * sizeof(struct page)) >> PAGE_SHIFT;
- printk(KERN_DEBUG "%lu pages used for memmap\n", pages);
- }
- return pages;
-}
-#else
-/* Account for mem_map for CONFIG_SPARSEMEM */
-unsigned long account_memmap(struct pglist_data *pgdat, int zone_index)
-{
- unsigned long pages = 0;
- unsigned long memmap_pfn;
- struct page *memmap_addr;
- int pnum;
- unsigned long pgdat_startpfn, pgdat_endpfn;
- struct mem_section *section;
-
- pgdat_startpfn = pgdat->node_start_pfn;
- pgdat_endpfn = pgdat_startpfn + pgdat->node_spanned_pages;
-
- /* Go through valid sections looking for memmap */
- for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
- if (!valid_section_nr(pnum))
- continue;
-
- section = __nr_to_section(pnum);
- if (!section_has_mem_map(section))
- continue;
-
- memmap_addr = __section_mem_map_addr(section);
- memmap_pfn = (unsigned long)memmap_addr >> PAGE_SHIFT;
-
- if (memmap_pfn < pgdat_startpfn || memmap_pfn >= pgdat_endpfn)
- continue;
-
- if (zone_index == memmap_zone_idx(memmap_addr))
- pages += (PAGES_PER_SECTION * sizeof(struct page));
- }
-
- pages >>= PAGE_SHIFT;
- printk(KERN_DEBUG "%lu pages used for SPARSE memmap\n", pages);
- return pages;
-}
-#endif
-
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -2437,17 +2385,32 @@ static void __meminit free_area_init_cor
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long size, realsize;
+ unsigned long size, realsize, memmap_pages;
size = zone_spanned_pages_in_node(nid, j, zones_size);
realsize = size - zone_absent_pages_in_node(nid, j,
zholes_size);
- realsize -= account_memmap(pgdat, j);
+ /*
+ * Adjust realsize so that it accounts for how much memory
+ * is used by this zone for memmap. This affects the watermark
+ * and per-cpu initialisations
+ */
+ memmap_pages = (size * sizeof(struct page)) >> PAGE_SHIFT;
+ if (realsize >= memmap_pages) {
+ realsize -= memmap_pages;
+ printk(KERN_DEBUG
+ " %s zone: %lu pages used for memmap\n",
+ zone_names[j], memmap_pages);
+ } else
+ printk(KERN_WARNING
+ " %s zone: %lu pages exceeds realsize %lu\n",
+ zone_names[j], memmap_pages, realsize);
+
/* Account for reserved DMA pages */
if (j == ZONE_DMA && realsize > dma_reserve) {
realsize -= dma_reserve;
- printk(KERN_DEBUG "%lu pages DMA reserved\n",
+ printk(KERN_DEBUG " DMA zone: %lu pages reserved\n",
dma_reserve);
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists