linux-kernel - [RFC v3 2/2] mm, compaction: make kcompactd rely on sysctl_extfrag

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <1438619141-22215-2-git-send-email-vbabka@suse.cz>
Date:	Mon,  3 Aug 2015 18:25:41 +0200
From:	Vlastimil Babka <vbabka@...e.cz>
To:	linux-mm@...ck.org
Cc:	linux-kernel@...r.kernel.org,
	Andrew Morton <akpm@...ux-foundation.org>,
	Hugh Dickins <hughd@...gle.com>,
	Andrea Arcangeli <aarcange@...hat.com>,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
	Rik van Riel <riel@...hat.com>, Mel Gorman <mgorman@...e.de>,
	David Rientjes <rientjes@...gle.com>,
	Joonsoo Kim <iamjoonsoo.kim@....com>,
	Vlastimil Babka <vbabka@...e.cz>
Subject: [RFC v3 2/2] mm, compaction: make kcompactd rely on sysctl_extfrag_threshold

The previous patch introduced kcompactd kthreads which are meant to keep
memory fragmentation lower than what kswapd achieves through its
reclaim/compaction activity. In order to do that, it needs a stricter criteria
to determine when to start/stop compacting, than the standard criteria that
try to satisfy a single next high-order allocation request. This patch
provides such criteria with minimal changes and no new tunables.

This patch uses the existing sysctl_extfrag_threshold tunable. This tunable
currently determines when direct compaction should stop trying to satisfy an
allocation - that happens when a page of desired order has not been made
available, but the fragmentation already dropped below given threshold, so we
expect further compaction to be too costly and possibly fail anyway.

For kcompactd, we simply ignore whether the page has been available, and
continue compacting, until fragmentation drops below the threshold (or the
whole zone is scanned).

Not-yet-signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
 include/linux/compaction.h |  7 ++++---
 mm/compaction.c            | 37 ++++++++++++++++++++++++++-----------
 mm/internal.h              |  1 +
 mm/vmscan.c                | 10 +++++-----
 mm/vmstat.c                | 12 +++++++-----
 5 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 8cd1fb5..c615465 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -36,14 +36,15 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *length, loff_t *ppos);
 extern int sysctl_compact_unevictable_allowed;
 
-extern int fragmentation_index(struct zone *zone, unsigned int order);
+extern int fragmentation_index(struct zone *zone, unsigned int order,
+							bool ignore_suitable);
 extern unsigned long try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
 			int alloc_flags, const struct alloc_context *ac,
 			enum migrate_mode mode, int *contended);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order,
-					int alloc_flags, int classzone_idx);
+			int alloc_flags, int classzone_idx, bool kcompactd);
 
 extern void defer_compaction(struct zone *zone, int order);
 extern bool compaction_deferred(struct zone *zone, int order);
@@ -73,7 +74,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
 }
 
 static inline unsigned long compaction_suitable(struct zone *zone, int order,
-					int alloc_flags, int classzone_idx)
+			int alloc_flags, int classzone_idx, bool kcompactd)
 {
 	return COMPACT_SKIPPED;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index b051412..62b9e51 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1183,6 +1183,19 @@ static int __compact_finished(struct zone *zone, struct compact_control *cc,
 							cc->alloc_flags))
 		return COMPACT_CONTINUE;
 
+	if (cc->kcompactd) {
+		/*
+		 * kcompactd continues even if watermarks are met, until the
+		 * fragmentation index is so low that direct compaction
+		 * wouldn't be attempted
+		 */
+		int fragindex = fragmentation_index(zone, cc->order, true);
+		if (fragindex <= sysctl_extfrag_threshold)
+			return COMPACT_NOT_SUITABLE_ZONE;
+		else
+			return COMPACT_CONTINUE;
+	}
+
 	/* Direct compactor: Is a suitable page free? */
 	for (order = cc->order; order < MAX_ORDER; order++) {
 		struct free_area *area = &zone->free_area[order];
@@ -1231,7 +1244,7 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
  *   COMPACT_CONTINUE - If compaction should run now
  */
 static unsigned long __compaction_suitable(struct zone *zone, int order,
-					int alloc_flags, int classzone_idx)
+			int alloc_flags, int classzone_idx, bool kcompactd)
 {
 	int fragindex;
 	unsigned long watermark;
@@ -1246,10 +1259,10 @@ static unsigned long __compaction_suitable(struct zone *zone, int order,
 	watermark = low_wmark_pages(zone);
 	/*
 	 * If watermarks for high-order allocation are already met, there
-	 * should be no need for compaction at all.
+	 * should be no need for compaction at all, unless it's kcompactd.
 	 */
-	if (zone_watermark_ok(zone, order, watermark, classzone_idx,
-								alloc_flags))
+	if (!kcompactd && zone_watermark_ok(zone, order, watermark,
+						classzone_idx, alloc_flags))
 		return COMPACT_PARTIAL;
 
 	/*
@@ -1272,7 +1285,7 @@ static unsigned long __compaction_suitable(struct zone *zone, int order,
 	 *
 	 * Only compact if a failure would be due to fragmentation.
 	 */
-	fragindex = fragmentation_index(zone, order);
+	fragindex = fragmentation_index(zone, order, kcompactd);
 	if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
 		return COMPACT_NOT_SUITABLE_ZONE;
 
@@ -1280,11 +1293,12 @@ static unsigned long __compaction_suitable(struct zone *zone, int order,
 }
 
 unsigned long compaction_suitable(struct zone *zone, int order,
-					int alloc_flags, int classzone_idx)
+			int alloc_flags, int classzone_idx, bool kcompactd)
 {
 	unsigned long ret;
 
-	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx);
+	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
+								kcompactd);
 	trace_mm_compaction_suitable(zone, order, ret);
 	if (ret == COMPACT_NOT_SUITABLE_ZONE)
 		ret = COMPACT_SKIPPED;
@@ -1302,7 +1316,7 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	unsigned long last_migrated_pfn = 0;
 
 	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
-							cc->classzone_idx);
+					cc->classzone_idx, cc->kcompactd);
 	switch (ret) {
 	case COMPACT_PARTIAL:
 	case COMPACT_SKIPPED:
@@ -1731,8 +1745,8 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat, int order)
 	for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
 		zone = &pgdat->node_zones[zoneid];
 
-		if (compaction_suitable(zone, order, 0, zoneid) ==
-						COMPACT_CONTINUE)
+		if (compaction_suitable(zone, order, 0, zoneid, true) ==
+							COMPACT_CONTINUE)
 			return true;
 	}
 
@@ -1750,6 +1764,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 	struct compact_control cc = {
 		.order = pgdat->kcompactd_max_order,
 		.mode = MIGRATE_SYNC_LIGHT,
+		.kcompactd = true,
 		//TODO: do this or not?
 		.ignore_skip_hint = true,
 	};
@@ -1760,7 +1775,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 		if (!populated_zone(zone))
 			continue;
 
-		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
+		if (compaction_suitable(zone, cc.order, 0, zoneid, true) !=
 							COMPACT_CONTINUE)
 			continue;
 
diff --git a/mm/internal.h b/mm/internal.h
index 36b23f1..2cea51a 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -184,6 +184,7 @@ struct compact_control {
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
 	enum migrate_mode mode;		/* Async or sync migration mode */
 	bool ignore_skip_hint;		/* Scan blocks even if marked skip */
+	bool kcompactd;			/* We are in kcompactd kthread */
 	int order;			/* order a direct compactor needs */
 	const gfp_t gfp_mask;		/* gfp mask of a direct compactor */
 	const int alloc_flags;		/* alloc flags of a direct compactor */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 075f53c..f6582b6 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2339,7 +2339,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
 		return true;
 
 	/* If compaction would go ahead or the allocation would succeed, stop */
-	switch (compaction_suitable(zone, sc->order, 0, 0)) {
+	switch (compaction_suitable(zone, sc->order, 0, 0, false)) {
 	case COMPACT_PARTIAL:
 	case COMPACT_CONTINUE:
 		return false;
@@ -2467,7 +2467,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	 * If compaction is not ready to start and allocation is not likely
 	 * to succeed without it, then keep reclaiming.
 	 */
-	if (compaction_suitable(zone, order, 0, 0) == COMPACT_SKIPPED)
+	if (compaction_suitable(zone, order, 0, 0, false) == COMPACT_SKIPPED)
 		return false;
 
 	return watermark_ok;
@@ -2941,7 +2941,7 @@ static bool zone_balanced(struct zone *zone, int order,
 		return false;
 
 	if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
-				order, 0, classzone_idx) == COMPACT_SKIPPED)
+			order, 0, classzone_idx, false) == COMPACT_SKIPPED)
 		return false;
 
 	return true;
@@ -3065,8 +3065,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	 * from memory. Do not reclaim more than needed for compaction.
 	 */
 	if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
-			compaction_suitable(zone, sc->order, 0, classzone_idx)
-							!= COMPACT_SKIPPED)
+			compaction_suitable(zone, sc->order, 0, classzone_idx,
+						false) != COMPACT_SKIPPED)
 		testorder = 0;
 
 	/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 4f5cd97..9916110 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -643,7 +643,8 @@ static void fill_contig_page_info(struct zone *zone,
  * The value can be used to determine if page reclaim or compaction
  * should be used
  */
-static int __fragmentation_index(unsigned int order, struct contig_page_info *info)
+static int __fragmentation_index(unsigned int order,
+			struct contig_page_info *info, bool ignore_suitable)
 {
 	unsigned long requested = 1UL << order;
 
@@ -651,7 +652,7 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in
 		return 0;
 
 	/* Fragmentation index only makes sense when a request would fail */
-	if (info->free_blocks_suitable)
+	if (!ignore_suitable && info->free_blocks_suitable)
 		return -1000;
 
 	/*
@@ -664,12 +665,13 @@ static int __fragmentation_index(unsigned int order, struct contig_page_info *in
 }
 
 /* Same as __fragmentation index but allocs contig_page_info on stack */
-int fragmentation_index(struct zone *zone, unsigned int order)
+int fragmentation_index(struct zone *zone, unsigned int order,
+							bool ignore_suitable)
 {
 	struct contig_page_info info;
 
 	fill_contig_page_info(zone, order, &info);
-	return __fragmentation_index(order, &info);
+	return __fragmentation_index(order, &info, ignore_suitable);
 }
 #endif
 
@@ -1635,7 +1637,7 @@ static void extfrag_show_print(struct seq_file *m,
 				zone->name);
 	for (order = 0; order < MAX_ORDER; ++order) {
 		fill_contig_page_info(zone, order, &info);
-		index = __fragmentation_index(order, &info);
+		index = __fragmentation_index(order, &info, false);
 		seq_printf(m, "%d.%03d ", index / 1000, index % 1000);
 	}
 
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/