[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1560468577-101178-8-git-send-email-yang.shi@linux.alibaba.com>
Date: Fri, 14 Jun 2019 07:29:35 +0800
From: Yang Shi <yang.shi@...ux.alibaba.com>
To: mhocko@...e.com, mgorman@...hsingularity.net, riel@...riel.com,
hannes@...xchg.org, akpm@...ux-foundation.org,
dave.hansen@...el.com, keith.busch@...el.com,
dan.j.williams@...el.com, fengguang.wu@...el.com, fan.du@...el.com,
ying.huang@...el.com, ziy@...dia.com
Cc: yang.shi@...ux.alibaba.com, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: [v3 PATCH 7/9] mm: vmscan: check if the demote target node is contended or not
When demoting to the migration target node, the target node may have
memory pressure, then the memory pressure may cause migrate_pages()
fail.
If the failure is caused by memory pressure (i.e. returning -ENOMEM),
tag the node with PGDAT_CONTENDED. The tag would be cleared once the
target node is balanced again.
Check if the target node is PGDAT_CONTENDED or not, if it is just skip
demotion.
Signed-off-by: Yang Shi <yang.shi@...ux.alibaba.com>
---
include/linux/mmzone.h | 3 +++
mm/vmscan.c | 37 +++++++++++++++++++++++++++++++++++++
2 files changed, 40 insertions(+)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 70394ca..d4e05c5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -573,6 +573,9 @@ enum pgdat_flags {
* many pages under writeback
*/
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
+ PGDAT_CONTENDED, /* the node has not enough free memory
+ * available
+ */
};
enum zone_flags {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index fb931ded..9ec55d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1126,6 +1126,21 @@ static inline struct page *alloc_demote_page(struct page *page,
}
#endif
+static inline bool is_migration_target_contended(int nid)
+{
+ int node;
+ nodemask_t used_mask;
+
+
+ nodes_clear(used_mask);
+ node = find_next_best_node(nid, &used_mask, true);
+
+ if (test_bit(PGDAT_CONTENDED, &NODE_DATA(node)->flags))
+ return true;
+
+ return false;
+}
+
static inline bool is_demote_ok(int nid, struct scan_control *sc)
{
/* Just do demotion with migrate mode of node reclaim */
@@ -1144,6 +1159,10 @@ static inline bool is_demote_ok(int nid, struct scan_control *sc)
if (!has_migration_target_node_online())
return false;
+ /* Check if the demote target node is contended or not */
+ if (is_migration_target_contended(nid))
+ return false;
+
return true;
}
@@ -1564,6 +1583,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
nr_reclaimed += nr_succeeded;
if (err) {
+ if (err == -ENOMEM)
+ set_bit(PGDAT_CONTENDED,
+ &NODE_DATA(target_nid)->flags);
+
putback_movable_pages(&demote_pages);
list_splice(&ret_pages, &demote_pages);
@@ -2597,6 +2620,19 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* scan target and the percentage scanning already complete
*/
lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE;
+
+ /*
+ * The shrink_page_list() may find the demote target node is
+ * contended, if so it doesn't make sense to scan anonymous
+ * LRU again.
+ *
+ * Need check if swap is available or not too since demotion
+ * may happen on swapless system.
+ */
+ if (!is_demote_ok(pgdat->node_id, sc) &&
+ (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0))
+ lru = LRU_FILE;
+
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
@@ -3447,6 +3483,7 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
clear_bit(PGDAT_CONGESTED, &pgdat->flags);
clear_bit(PGDAT_DIRTY, &pgdat->flags);
clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
+ clear_bit(PGDAT_CONTENDED, &pgdat->flags);
}
/*
--
1.8.3.1
Powered by blists - more mailing lists