lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 22 Sep 2016 17:18:48 +0200
From:   Vlastimil Babka <vbabka@...e.cz>
To:     Michal Hocko <mhocko@...nel.org>
Cc:     Andrew Morton <akpm@...ux-foundation.org>,
        Arkadiusz Miskiewicz <a.miskiewicz@...il.com>,
        Ralf-Peter Rohbeck <Ralf-Peter.Rohbeck@...ntum.com>,
        Olaf Hering <olaf@...fle.de>, linux-kernel@...r.kernel.org,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        linux-mm@...ck.org, David Rientjes <rientjes@...gle.com>,
        Joonsoo Kim <iamjoonsoo.kim@....com>,
        Mel Gorman <mgorman@...hsingularity.net>,
        Rik van Riel <riel@...hat.com>
Subject: Re: [PATCH 0/4] reintroduce compaction feedback for OOM decisions

On 09/21/2016 07:18 PM, Michal Hocko wrote:
> On Tue 06-09-16 15:52:54, Vlastimil Babka wrote:
> 
> We still do not ignore fragindex in the full priority. This part has
> always been quite unclear to me so I cannot really tell whether that
> makes any difference or not but just to be on the safe side I would
> preffer to have _all_ the shortcuts out of the way in the highest
> priority. It is true that this will cause COMPACT_NOT_SUITABLE_ZONE
> so keep retrying but still a complication to understand the workflow.
> 
> What do you think?
 
I was thinking that this shouldn't be a problem on non-costly orders and default
extfrag_threshold. But better be safe. Moreover I think the issue is much more
dangerous for compact_zonelist_suitable() as explained below.

----8<----
>From 0e6cb251aa6e3b1be7deff315c0238c4d478f22e Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@...e.cz>
Date: Thu, 22 Sep 2016 15:33:57 +0200
Subject: [PATCH] mm, compaction: ignore fragindex on highest direct compaction
 priority

Fragmentation index check in compaction_suitable() should be the last heuristic
that we allow on the highest compaction priority. Since that's a potential
premature OOM, disable it too. Even more problematic is its usage from
compaction_zonelist_suitable() -> __compaction_suitable() where we check the
order-0 watermark against free plus available-for-reclaim pages, but the
fragindex considers only truly free pages. Thus we can get a result close to 0
indicating failure do to lack of memory, and wrongly decide that compaction
won't be suitable even after reclaim. The solution is to skip the fragindex
check also in this context, regardless of priority.

Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
 include/linux/compaction.h |  5 +++--
 mm/compaction.c            | 44 +++++++++++++++++++++++---------------------
 mm/internal.h              |  1 +
 mm/vmscan.c                |  6 ++++--
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 0d8415820fc3..3ccf13d57651 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -97,7 +97,8 @@ extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
 		const struct alloc_context *ac, enum compact_priority prio);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern enum compact_result compaction_suitable(struct zone *zone, int order,
-		unsigned int alloc_flags, int classzone_idx);
+		unsigned int alloc_flags, int classzone_idx,
+		bool check_fragindex);
 
 extern void defer_compaction(struct zone *zone, int order);
 extern bool compaction_deferred(struct zone *zone, int order);
@@ -183,7 +184,7 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
 }
 
 static inline enum compact_result compaction_suitable(struct zone *zone, int order,
-					int alloc_flags, int classzone_idx)
+		int alloc_flags, int classzone_idx, bool check_fragindex)
 {
 	return COMPACT_SKIPPED;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index 86d4d0bbfc7c..ae6a115f37b2 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1379,7 +1379,6 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 					int classzone_idx,
 					unsigned long wmark_target)
 {
-	int fragindex;
 	unsigned long watermark;
 
 	if (is_via_compact_memory(order))
@@ -1415,6 +1414,18 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 						ALLOC_CMA, wmark_target))
 		return COMPACT_SKIPPED;
 
+	return COMPACT_CONTINUE;
+}
+
+enum compact_result compaction_suitable(struct zone *zone, int order,
+					unsigned int alloc_flags,
+					int classzone_idx, bool check_fragindex)
+{
+	enum compact_result ret;
+	int fragindex;
+
+	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
+			zone_page_state(zone, NR_FREE_PAGES));
 	/*
 	 * fragmentation index determines if allocation failures are due to
 	 * low memory or external fragmentation
@@ -1426,21 +1437,12 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order,
 	 *
 	 * Only compact if a failure would be due to fragmentation.
 	 */
-	fragindex = fragmentation_index(zone, order);
-	if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
-		return COMPACT_NOT_SUITABLE_ZONE;
-
-	return COMPACT_CONTINUE;
-}
-
-enum compact_result compaction_suitable(struct zone *zone, int order,
-					unsigned int alloc_flags,
-					int classzone_idx)
-{
-	enum compact_result ret;
+	if (ret == COMPACT_CONTINUE && check_fragindex) {
+		fragindex = fragmentation_index(zone, order);
+		if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
+			ret = COMPACT_NOT_SUITABLE_ZONE;
+	}
 
-	ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx,
-				    zone_page_state(zone, NR_FREE_PAGES));
 	trace_mm_compaction_suitable(zone, order, ret);
 	if (ret == COMPACT_NOT_SUITABLE_ZONE)
 		ret = COMPACT_SKIPPED;
@@ -1473,8 +1475,7 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
 		available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
 		compact_result = __compaction_suitable(zone, order, alloc_flags,
 				ac_classzone_idx(ac), available);
-		if (compact_result != COMPACT_SKIPPED &&
-				compact_result != COMPACT_NOT_SUITABLE_ZONE)
+		if (compact_result != COMPACT_SKIPPED)
 			return true;
 	}
 
@@ -1490,7 +1491,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro
 	const bool sync = cc->mode != MIGRATE_ASYNC;
 
 	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
-							cc->classzone_idx);
+				cc->classzone_idx, !cc->ignore_fragindex);
 	/* Compaction is likely to fail */
 	if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
 		return ret;
@@ -1661,7 +1662,8 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
 		.direct_compaction = true,
 		.whole_zone = (prio == MIN_COMPACT_PRIORITY),
 		.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
-		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
+		.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY),
+		.ignore_fragindex = (prio == MIN_COMPACT_PRIORITY)
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -1869,7 +1871,7 @@ static bool kcompactd_node_suitable(pg_data_t *pgdat)
 			continue;
 
 		if (compaction_suitable(zone, pgdat->kcompactd_max_order, 0,
-					classzone_idx) == COMPACT_CONTINUE)
+				classzone_idx, true) == COMPACT_CONTINUE)
 			return true;
 	}
 
@@ -1905,7 +1907,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
 		if (compaction_deferred(zone, cc.order))
 			continue;
 
-		if (compaction_suitable(zone, cc.order, 0, zoneid) !=
+		if (compaction_suitable(zone, cc.order, 0, zoneid, true) !=
 							COMPACT_CONTINUE)
 			continue;
 
diff --git a/mm/internal.h b/mm/internal.h
index 537ac9951f5f..f18adf559e28 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -179,6 +179,7 @@ struct compact_control {
 	enum migrate_mode mode;		/* Async or sync migration mode */
 	bool ignore_skip_hint;		/* Scan blocks even if marked skip */
 	bool ignore_block_suitable;	/* Scan blocks considered unsuitable */
+	bool ignore_fragindex;		/* Ignore fragmentation index */
 	bool direct_compaction;		/* False from kcompactd or /proc/... */
 	bool whole_zone;		/* Whole zone should/has been scanned */
 	int order;			/* order a direct compactor needs */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 55943a284082..08f16893cb2b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2511,7 +2511,8 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat,
 		if (!managed_zone(zone))
 			continue;
 
-		switch (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx)) {
+		switch (compaction_suitable(zone, sc->order, 0,
+						sc->reclaim_idx, true)) {
 		case COMPACT_SUCCESS:
 		case COMPACT_CONTINUE:
 			return false;
@@ -2624,7 +2625,8 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
 	unsigned long watermark;
 	enum compact_result suitable;
 
-	suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx);
+	suitable = compaction_suitable(zone, sc->order, 0, sc->reclaim_idx,
+									true);
 	if (suitable == COMPACT_SUCCESS)
 		/* Allocation should succeed already. Don't reclaim. */
 		return true;
-- 
2.10.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ