[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1462865763-22084-10-git-send-email-vbabka@suse.cz>
Date: Tue, 10 May 2016 09:35:59 +0200
From: Vlastimil Babka <vbabka@...e.cz>
To: Michal Hocko <mhocko@...nel.org>
Cc: linux-mm@...ck.org, Andrew Morton <akpm@...ux-foundation.org>,
Joonsoo Kim <iamjoonsoo.kim@....com>,
Rik van Riel <riel@...hat.com>,
David Rientjes <rientjes@...gle.com>,
Mel Gorman <mgorman@...hsingularity.net>,
Johannes Weiner <hannes@...xchg.org>,
Tetsuo Handa <penguin-kernel@...ove.sakura.ne.jp>,
linux-kernel@...r.kernel.org,
Linus Torvalds <torvalds@...ux-foundation.org>,
Vlastimil Babka <vbabka@...e.cz>
Subject: [RFC 09/13] mm, compaction: make whole_zone flag ignore cached scanner positions
A recent patch has added whole_zone flag that compaction sets when scanning
starts from the zone boundary, in order to report that zone has been fully
scanned in one attempt. For allocations that want to try really hard or cannot
fail, we will want to introduce a mode where scanning whole zone is guaranteed
regardless of the cached positions.
This patch reuses the whole_zone flag in a way that if it's already passed true
to compaction, the cached scanner positions are ignored. Employing this flag
during reclaim/compaction loop will be done in the next patch. This patch
however converts compaction invoked from userspace via procfs to use this flag.
Before this patch, the cached positions were first reset to zone boundaries and
then read back from struct zone, so there was a window where a parallel
compaction could replace the reset values, making the manual compaction less
effective. Using the flag instead of performing reset is more robust.
Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
mm/compaction.c | 15 +++++----------
mm/internal.h | 2 +-
2 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/mm/compaction.c b/mm/compaction.c
index f649c7bc6de5..1ce6783d3ead 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1442,11 +1442,13 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
*/
cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
cc->free_pfn = zone->compact_cached_free_pfn;
- if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
+ if (cc->whole_zone || cc->free_pfn < start_pfn ||
+ cc->free_pfn >= end_pfn) {
cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
zone->compact_cached_free_pfn = cc->free_pfn;
}
- if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
+ if (cc->whole_zone || cc->migrate_pfn < start_pfn ||
+ cc->migrate_pfn >= end_pfn) {
cc->migrate_pfn = start_pfn;
zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
@@ -1693,14 +1695,6 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
INIT_LIST_HEAD(&cc->freepages);
INIT_LIST_HEAD(&cc->migratepages);
- /*
- * When called via /proc/sys/vm/compact_memory
- * this makes sure we compact the whole zone regardless of
- * cached scanner positions.
- */
- if (is_via_compact_memory(cc->order))
- __reset_isolation_suitable(zone);
-
if (is_via_compact_memory(cc->order) ||
!compaction_deferred(zone, cc->order))
compact_zone(zone, cc);
@@ -1736,6 +1730,7 @@ static void compact_node(int nid)
.order = -1,
.mode = MIGRATE_SYNC,
.ignore_skip_hint = true,
+ .whole_zone = true,
};
__compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index 556bc9d0a817..2acdee8ab0e6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -178,7 +178,7 @@ struct compact_control {
enum migrate_mode mode; /* Async or sync migration mode */
bool ignore_skip_hint; /* Scan blocks even if marked skip */
bool direct_compaction; /* False from kcompactd or /proc/... */
- bool whole_zone; /* Whole zone has been scanned */
+ bool whole_zone; /* Whole zone should/has been scanned */
int order; /* order a direct compactor needs */
const gfp_t gfp_mask; /* gfp mask of a direct compactor */
const unsigned int alloc_flags; /* alloc flags of a direct compactor */
--
2.8.2
Powered by blists - more mailing lists