Index: linux-2.6-git/mm/page_alloc.c =================================================================== --- linux-2.6-git.orig/mm/page_alloc.c 2010-02-18 15:58:20.000000000 +0100 +++ linux-2.6-git/mm/page_alloc.c 2010-02-18 16:53:19.000000000 +0100 @@ -1179,7 +1179,7 @@ static inline struct page *buffered_rmqueue(struct zone *preferred_zone, struct zone *zone, int order, gfp_t gfp_flags, - int migratetype) + int migratetype, int debugme) { unsigned long flags; struct page *page; @@ -1195,14 +1195,19 @@ pcp = &zone_pcp(zone, cpu)->pcp; list = &pcp->lists[migratetype]; local_irq_save(flags); + if (debugme) printk(KERN_INFO"%s - got list %p for migratetype %d\n", __func__, list, migratetype); if (list_empty(list)) { + if (debugme) printk(KERN_INFO"%s - list was empty - go for rmqueue_bulk\n", __func__); pcp->count += rmqueue_bulk(zone, 0, pcp->batch, list, migratetype, cold); - if (unlikely(list_empty(list))) + if (unlikely(list_empty(list))) { + if (debugme) printk(KERN_INFO"%s - list still empty after rmqueue_bulk\n", __func__); goto failed; + } } + if (debugme) printk(KERN_INFO"%s - list has pages - I'm irq save on one cpu and take one\n", __func__); if (cold) page = list_entry(list->prev, struct page, lru); else @@ -1238,8 +1243,10 @@ put_cpu(); VM_BUG_ON(bad_range(zone, page)); - if (prep_new_page(page, order, gfp_flags)) + if (prep_new_page(page, order, gfp_flags)) { + if (debugme) printk(KERN_INFO"%s - prep_new_page triggered - goto again\n", __func__); goto again; + } return page; failed: @@ -1515,7 +1522,7 @@ static struct page * get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order, struct zonelist *zonelist, int high_zoneidx, int alloc_flags, - struct zone *preferred_zone, int migratetype) + struct zone *preferred_zone, int migratetype, int debugme) { struct zoneref *z; struct page *page = NULL; @@ -1533,45 +1540,61 @@ */ for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx, nodemask) { + if (debugme) printk(KERN_INFO"%s - zone loop - current zone '%s'\n", __func__, zone->name); if (NUMA_BUILD && zlc_active && - !zlc_zone_worth_trying(zonelist, z, allowednodes)) + !zlc_zone_worth_trying(zonelist, z, allowednodes)) { + if (debugme) printk(KERN_INFO"%s - skip due to NUMA_BUILD && zlc_active\n", __func__); continue; + } if ((alloc_flags & ALLOC_CPUSET) && - !cpuset_zone_allowed_softwall(zone, gfp_mask)) + !cpuset_zone_allowed_softwall(zone, gfp_mask)) { + if (debugme) printk(KERN_INFO"%s - skip due to ALLOC_CPUSET && !allowed_softwall\n", __func__); goto try_next_zone; + } BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK); if (!(alloc_flags & ALLOC_NO_WATERMARKS)) { unsigned long mark; int ret; + if (debugme) printk(KERN_INFO"%s - watermark check due to !(alloc_flags & ALLOC_NO_WATERMARKS)\n", __func__); + mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; if (zone_watermark_ok(zone, order, mark, - classzone_idx, alloc_flags)) + classzone_idx, alloc_flags)) { + if (debugme) printk(KERN_INFO"%s - goto this_zone due to zone_watermark_ok\n", __func__); goto try_this_zone; + } - if (zone_reclaim_mode == 0) + if (zone_reclaim_mode == 0) { + if (debugme) printk(KERN_INFO"%s - goto zone_full due to zone_reclaim_mode==0\n", __func__); goto this_zone_full; + } ret = zone_reclaim(zone, gfp_mask, order); switch (ret) { case ZONE_RECLAIM_NOSCAN: /* did not scan */ + if (debugme) printk(KERN_INFO"%s - goto next_zone due to ZONE_RECLAIM_NOSCAN\n", __func__); goto try_next_zone; case ZONE_RECLAIM_FULL: /* scanned but unreclaimable */ + if (debugme) printk(KERN_INFO"%s - goto zone_full due to ZONE_RECLAIM_FULL\n", __func__); goto this_zone_full; default: /* did we reclaim enough */ if (!zone_watermark_ok(zone, order, mark, - classzone_idx, alloc_flags)) + classzone_idx, alloc_flags)) { + if (debugme) printk(KERN_INFO"%s - goto zone_full due to !zone_watermark_ok\n", __func__); goto this_zone_full; + } } } try_this_zone: + if (debugme) printk(KERN_INFO"%s - reach buffered_rmqueue for zone '%s'\n", __func__, zone->name); page = buffered_rmqueue(preferred_zone, zone, order, - gfp_mask, migratetype); + gfp_mask, migratetype, debugme); if (page) break; this_zone_full: @@ -1594,6 +1617,8 @@ zlc_active = 0; goto zonelist_scan; } + + if (debugme) printk(KERN_INFO"%s - return page '%p'\n", __func__, page); return page; } @@ -1655,7 +1680,7 @@ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_HIGH|ALLOC_CPUSET, - preferred_zone, migratetype); + preferred_zone, migratetype,0); if (page) goto out; @@ -1979,7 +2004,7 @@ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, - migratetype); + migratetype, 0); t4 = get_clock(); perf_count_pages_direct_reclaim++; @@ -1994,11 +2019,12 @@ perf_count_direct_reclaim_slow_sum_progress += *did_some_progress; perf_count_direct_reclaim_slow_sum_order += order; - perf_count_page_stats(nodemask, zonelist, high_zoneidx, preferred_zone, migratetype, *did_some_progress, gfp_mask, order); + // perf_count_page_stats(nodemask, zonelist, high_zoneidx, preferred_zone, migratetype, *did_some_progress, gfp_mask, order); + // We know stats look good but even second alloc fail (might be racy) - call get_page_from_freelist with debug instead page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags, preferred_zone, - migratetype); + migratetype, 1); if (!page) printk(KERN_DEBUG"second get_page_from_freelist fail\n"); else @@ -2032,7 +2058,7 @@ do { page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, migratetype,0); if (!page && gfp_mask & __GFP_NOFAIL) { perf_count_call_congestion_wait_from_alloc_pages_high_priority++; @@ -2142,7 +2168,7 @@ /* This is the last chance, in general, before the goto nopage. */ page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, - preferred_zone, migratetype); + preferred_zone, migratetype,0); if (page) goto got_pg; @@ -2268,7 +2294,7 @@ /* First allocation attempt */ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order, zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET, - preferred_zone, migratetype); + preferred_zone, migratetype,0); if (unlikely(!page)) page = __alloc_pages_slowpath(gfp_mask, order, zonelist, high_zoneidx, nodemask,