lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1560468577-101178-8-git-send-email-yang.shi@linux.alibaba.com>
Date:   Fri, 14 Jun 2019 07:29:35 +0800
From:   Yang Shi <yang.shi@...ux.alibaba.com>
To:     mhocko@...e.com, mgorman@...hsingularity.net, riel@...riel.com,
        hannes@...xchg.org, akpm@...ux-foundation.org,
        dave.hansen@...el.com, keith.busch@...el.com,
        dan.j.williams@...el.com, fengguang.wu@...el.com, fan.du@...el.com,
        ying.huang@...el.com, ziy@...dia.com
Cc:     yang.shi@...ux.alibaba.com, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: [v3 PATCH 7/9] mm: vmscan: check if the demote target node is contended or not

When demoting to the migration target node, the target node may have
memory pressure, then the memory pressure may cause migrate_pages()
fail.

If the failure is caused by memory pressure (i.e. returning -ENOMEM),
tag the node with PGDAT_CONTENDED.  The tag would be cleared once the
target node is balanced again.

Check if the target node is PGDAT_CONTENDED or not, if it is just skip
demotion.

Signed-off-by: Yang Shi <yang.shi@...ux.alibaba.com>
---
 include/linux/mmzone.h |  3 +++
 mm/vmscan.c            | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 70394ca..d4e05c5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -573,6 +573,9 @@ enum pgdat_flags {
 					 * many pages under writeback
 					 */
 	PGDAT_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
+	PGDAT_CONTENDED,		/* the node has not enough free memory
+					 * available
+					 */
 };
 
 enum zone_flags {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fb931ded..9ec55d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1126,6 +1126,21 @@ static inline struct page *alloc_demote_page(struct page *page,
 }
 #endif
 
+static inline bool is_migration_target_contended(int nid)
+{
+	int node;
+	nodemask_t used_mask;
+
+
+	nodes_clear(used_mask);
+	node = find_next_best_node(nid, &used_mask, true);
+
+	if (test_bit(PGDAT_CONTENDED, &NODE_DATA(node)->flags))
+		return true;
+
+	return false;
+}
+
 static inline bool is_demote_ok(int nid, struct scan_control *sc)
 {
 	/* Just do demotion with migrate mode of node reclaim */
@@ -1144,6 +1159,10 @@ static inline bool is_demote_ok(int nid, struct scan_control *sc)
 	if (!has_migration_target_node_online())
 		return false;
 
+	/* Check if the demote target node is contended or not */
+	if (is_migration_target_contended(nid))
+		return false;
+
 	return true;
 }
 
@@ -1564,6 +1583,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 		nr_reclaimed += nr_succeeded;
 
 		if (err) {
+			if (err == -ENOMEM)
+				set_bit(PGDAT_CONTENDED,
+					&NODE_DATA(target_nid)->flags);
+
 			putback_movable_pages(&demote_pages);
 
 			list_splice(&ret_pages, &demote_pages);
@@ -2597,6 +2620,19 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
 		 * scan target and the percentage scanning already complete
 		 */
 		lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE;
+
+		/*
+		 * The shrink_page_list() may find the demote target node is
+		 * contended, if so it doesn't make sense to scan anonymous
+		 * LRU again.
+		 *
+		 * Need check if swap is available or not too since demotion
+		 * may happen on swapless system.
+		 */
+		if (!is_demote_ok(pgdat->node_id, sc) &&
+		    (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0))
+			lru = LRU_FILE;
+
 		nr_scanned = targets[lru] - nr[lru];
 		nr[lru] = targets[lru] * (100 - percentage) / 100;
 		nr[lru] -= min(nr[lru], nr_scanned);
@@ -3447,6 +3483,7 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
 	clear_bit(PGDAT_CONGESTED, &pgdat->flags);
 	clear_bit(PGDAT_DIRTY, &pgdat->flags);
 	clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
+	clear_bit(PGDAT_CONTENDED, &pgdat->flags);
 }
 
 /*
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ