[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20101209191033.GK19131@random.random>
Date: Thu, 9 Dec 2010 20:10:33 +0100
From: Andrea Arcangeli <aarcange@...hat.com>
To: Mel Gorman <mel@....ul.ie>
Cc: linux-mm@...ck.org, Linus Torvalds <torvalds@...ux-foundation.org>,
Andrew Morton <akpm@...ux-foundation.org>,
linux-kernel@...r.kernel.org,
Marcelo Tosatti <mtosatti@...hat.com>,
Adam Litke <agl@...ibm.com>, Avi Kivity <avi@...hat.com>,
Hugh Dickins <hugh.dickins@...cali.co.uk>,
Rik van Riel <riel@...hat.com>,
Dave Hansen <dave@...ux.vnet.ibm.com>,
Benjamin Herrenschmidt <benh@...nel.crashing.org>,
Ingo Molnar <mingo@...e.hu>, Mike Travis <travis@....com>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
Christoph Lameter <cl@...ux-foundation.org>,
Chris Wright <chrisw@...s-sol.org>, bpicco@...hat.com,
KOSAKI Motohiro <kosaki.motohiro@...fujitsu.com>,
Balbir Singh <balbir@...ux.vnet.ibm.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
Peter Zijlstra <peterz@...radead.org>,
Johannes Weiner <hannes@...xchg.org>,
Daisuke Nishimura <nishimura@....nes.nec.co.jp>,
Chris Mason <chris.mason@...cle.com>,
Borislav Petkov <bp@...en8.de>
Subject: Re: [PATCH 61 of 66] use compaction for GFP_ATOMIC order > 0
On Thu, Nov 18, 2010 at 04:31:24PM +0000, Mel Gorman wrote:
> I don't think this is related to THP although I see what you're doing.
> It should be handled on its own. I'd also wonder if some of the tg3
> failures are due to MIGRATE_RESERVE not being set properly when
> min_free_kbytes is automatically resized.
The failures also happened on older kernels except they wouldn't print
it in the kernel logs, but you're right we may get better with migrate
reserve enabled also on huge systems (it seems hugeadm
--set_recommended-min_free_kbytes was doing a little more than its
kernel counterpart with large systems with tons of ram, now fixed).
My status as far as this patch is concerned:
=========
Subject: use compaction for all allocation orders
From: Andrea Arcangeli <aarcange@...hat.com>
It makes no sense not to enable compaction for small order pages as we don't
want to end up with bad order 2 allocations and good and graceful order 9
allocations.
Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
---
diff --git a/mm/compaction.c b/mm/compaction.c
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -476,7 +495,7 @@ unsigned long try_to_compact_pages(struc
* made because an assumption is made that the page allocator can satisfy
* the "cheaper" orders without taking special steps
*/
- if (order <= PAGE_ALLOC_COSTLY_ORDER || !may_enter_fs || !may_perform_io)
+ if (!order || !may_enter_fs || !may_perform_io)
return rc;
count_vm_event(COMPACTSTALL);
===========
Subject: use compaction in kswapd for GFP_ATOMIC order > 0
From: Andrea Arcangeli <aarcange@...hat.com>
This takes advantage of memory compaction to properly generate pages of order >
0 if regular page reclaim fails and priority level becomes more severe and we
don't reach the proper watermarks.
Signed-off-by: Andrea Arcangeli <aarcange@...hat.com>
---
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -11,6 +11,9 @@
/* The full zone was compacted */
#define COMPACT_COMPLETE 3
+#define COMPACT_MODE_DIRECT_RECLAIM 0
+#define COMPACT_MODE_KSWAPD 1
+
#ifdef CONFIG_COMPACTION
extern int sysctl_compact_memory;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -20,6 +23,9 @@ extern int sysctl_extfrag_handler(struct
void __user *buffer, size_t *length, loff_t *ppos);
extern int fragmentation_index(struct zone *zone, unsigned int order);
+extern unsigned long compact_zone_order(struct zone *zone,
+ int order, gfp_t gfp_mask,
+ int compact_mode);
extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *mask);
@@ -59,6 +65,13 @@ static inline unsigned long try_to_compa
return COMPACT_CONTINUE;
}
+static inline unsigned long compact_zone_order(struct zone *zone,
+ int order, gfp_t gfp_mask,
+ int compact_mode)
+{
+ return COMPACT_CONTINUE;
+}
+
static inline void defer_compaction(struct zone *zone)
{
}
diff --git a/mm/compaction.c b/mm/compaction.c
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -38,6 +38,8 @@ struct compact_control {
unsigned int order; /* order a direct compactor needs */
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
+
+ int compact_mode;
};
static unsigned long release_freepages(struct list_head *freelist)
@@ -357,10 +359,10 @@ static void update_nr_listpages(struct c
}
static int compact_finished(struct zone *zone,
- struct compact_control *cc)
+ struct compact_control *cc)
{
unsigned int order;
- unsigned long watermark = low_wmark_pages(zone) + (1 << cc->order);
+ unsigned long watermark;
if (fatal_signal_pending(current))
return COMPACT_PARTIAL;
@@ -370,12 +372,27 @@ static int compact_finished(struct zone
return COMPACT_COMPLETE;
/* Compaction run is not finished if the watermark is not met */
+ if (cc->compact_mode != COMPACT_MODE_KSWAPD)
+ watermark = low_wmark_pages(zone);
+ else
+ watermark = high_wmark_pages(zone);
+ watermark += (1 << cc->order);
+
if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
return COMPACT_CONTINUE;
if (cc->order == -1)
return COMPACT_CONTINUE;
+ /*
+ * Generating only one page of the right order is not enough
+ * for kswapd, we must continue until we're above the high
+ * watermark as a pool for high order GFP_ATOMIC allocations
+ * too.
+ */
+ if (cc->compact_mode == COMPACT_MODE_KSWAPD)
+ return COMPACT_CONTINUE;
+
/* Direct compactor: Is a suitable page free? */
for (order = cc->order; order < MAX_ORDER; order++) {
/* Job done if page is free of the right migratetype */
@@ -433,8 +450,9 @@ static int compact_zone(struct zone *zon
return ret;
}
-static unsigned long compact_zone_order(struct zone *zone,
- int order, gfp_t gfp_mask)
+unsigned long compact_zone_order(struct zone *zone,
+ int order, gfp_t gfp_mask,
+ int compact_mode)
{
struct compact_control cc = {
.nr_freepages = 0,
@@ -442,6 +460,7 @@ static unsigned long compact_zone_order(
.order = order,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
+ .compact_mode = compact_mode,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -517,7 +536,8 @@ unsigned long try_to_compact_pages(struc
break;
}
- status = compact_zone_order(zone, order, gfp_mask);
+ status = compact_zone_order(zone, order, gfp_mask,
+ COMPACT_MODE_DIRECT_RECLAIM);
rc = max(status, rc);
if (zone_watermark_ok(zone, order, watermark, 0, 0))
@@ -547,6 +567,7 @@ static int compact_node(int nid)
.nr_freepages = 0,
.nr_migratepages = 0,
.order = -1,
+ .compact_mode = COMPACT_MODE_DIRECT_RECLAIM,
};
zone = &pgdat->node_zones[zoneid];
diff --git a/mm/vmscan.c b/mm/vmscan.c
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -40,6 +40,7 @@
#include <linux/memcontrol.h>
#include <linux/delayacct.h>
#include <linux/sysctl.h>
+#include <linux/compaction.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -2148,6 +2149,7 @@ loop_again:
* cause too much scanning of the lower zones.
*/
for (i = 0; i <= end_zone; i++) {
+ int compaction;
struct zone *zone = pgdat->node_zones + i;
int nr_slab;
@@ -2177,9 +2179,26 @@ loop_again:
lru_pages);
sc.nr_reclaimed += reclaim_state->reclaimed_slab;
total_scanned += sc.nr_scanned;
+
+ compaction = 0;
+ if (order &&
+ zone_watermark_ok(zone, 0,
+ high_wmark_pages(zone),
+ end_zone, 0) &&
+ !zone_watermark_ok(zone, order,
+ high_wmark_pages(zone),
+ end_zone, 0)) {
+ compact_zone_order(zone,
+ order,
+ sc.gfp_mask,
+ COMPACT_MODE_KSWAPD);
+ compaction = 1;
+ }
+
if (zone->all_unreclaimable)
continue;
- if (nr_slab == 0 && !zone_reclaimable(zone))
+ if (!compaction && nr_slab == 0 &&
+ !zone_reclaimable(zone))
zone->all_unreclaimable = 1;
/*
* If we've done a decent amount of scanning and
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists