[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1554955019-29472-8-git-send-email-yang.shi@linux.alibaba.com>
Date: Thu, 11 Apr 2019 11:56:57 +0800
From: Yang Shi <yang.shi@...ux.alibaba.com>
To: mhocko@...e.com, mgorman@...hsingularity.net, riel@...riel.com,
hannes@...xchg.org, akpm@...ux-foundation.org,
dave.hansen@...el.com, keith.busch@...el.com,
dan.j.williams@...el.com, fengguang.wu@...el.com, fan.du@...el.com,
ying.huang@...el.com, ziy@...dia.com
Cc: yang.shi@...ux.alibaba.com, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: [v2 PATCH 7/9] mm: vmscan: check if the demote target node is contended or not
When demoting to PMEM node, the target node may have memory pressure,
then the memory pressure may cause migrate_pages() fail.
If the failure is caused by memory pressure (i.e. returning -ENOMEM),
tag the node with PGDAT_CONTENDED. The tag would be cleared once the
target node is balanced again.
Check if the target node is PGDAT_CONTENDED or not, if it is just skip
demotion.
Signed-off-by: Yang Shi <yang.shi@...ux.alibaba.com>
---
include/linux/mmzone.h | 3 +++
mm/vmscan.c | 28 ++++++++++++++++++++++++++++
2 files changed, 31 insertions(+)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fba7741..de534db 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -520,6 +520,9 @@ enum pgdat_flags {
* many pages under writeback
*/
PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
+ PGDAT_CONTENDED, /* the node has not enough free memory
+ * available
+ */
};
enum zone_flags {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 80cd624..50cde53 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1048,6 +1048,9 @@ static void page_check_dirty_writeback(struct page *page,
static inline bool is_demote_ok(int nid, struct scan_control *sc)
{
+ int node;
+ nodemask_t used_mask;
+
/* It is pointless to do demotion in memcg reclaim */
if (!global_reclaim(sc))
return false;
@@ -1060,6 +1063,13 @@ static inline bool is_demote_ok(int nid, struct scan_control *sc)
if (!has_cpuless_node_online())
return false;
+ /* Check if the demote target node is contended or not */
+ nodes_clear(used_mask);
+ node = find_next_best_node(nid, &used_mask, true);
+
+ if (test_bit(PGDAT_CONTENDED, &NODE_DATA(node)->flags))
+ return false;
+
return true;
}
@@ -1502,6 +1512,10 @@ static unsigned long shrink_page_list(struct list_head *page_list,
nr_reclaimed += nr_succeeded;
if (err) {
+ if (err == -ENOMEM)
+ set_bit(PGDAT_CONTENDED,
+ &NODE_DATA(target_nid)->flags);
+
putback_movable_pages(&demote_pages);
list_splice(&ret_pages, &demote_pages);
@@ -2596,6 +2610,19 @@ static void shrink_node_memcg(struct pglist_data *pgdat, struct mem_cgroup *memc
* scan target and the percentage scanning already complete
*/
lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE;
+
+ /*
+ * The shrink_page_list() may find the demote target node is
+ * contended, if so it doesn't make sense to scan anonymous
+ * LRU again.
+ *
+ * Need check if swap is available or not too since demotion
+ * may happen on swapless system.
+ */
+ if (!is_demote_ok(pgdat->node_id, sc) &&
+ (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0))
+ lru = LRU_FILE;
+
nr_scanned = targets[lru] - nr[lru];
nr[lru] = targets[lru] * (100 - percentage) / 100;
nr[lru] -= min(nr[lru], nr_scanned);
@@ -3458,6 +3485,7 @@ static void clear_pgdat_congested(pg_data_t *pgdat)
clear_bit(PGDAT_CONGESTED, &pgdat->flags);
clear_bit(PGDAT_DIRTY, &pgdat->flags);
clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
+ clear_bit(PGDAT_CONTENDED, &pgdat->flags);
}
/*
--
1.8.3.1
Powered by blists - more mailing lists