lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 29 Jun 2020 16:45:15 -0700
From:   Dave Hansen <dave.hansen@...ux.intel.com>
To:     linux-kernel@...r.kernel.org
Cc:     linux-mm@...ck.org, Dave Hansen <dave.hansen@...ux.intel.com>,
        yang.shi@...ux.alibaba.com, rientjes@...gle.com,
        ying.huang@...el.com, dan.j.williams@...el.com
Subject: [RFC][PATCH 7/8] mm/vmscan: never demote for memcg reclaim


From: Dave Hansen <dave.hansen@...ux.intel.com>

Global reclaim aims to reduce the amount of memory used on
a given node or set of nodes.  Migrating pages to another
node serves this purpose.

memcg reclaim is different.  Its goal is to reduce the
total memory consumption of the entire memcg, across all
nodes.  Migration does not assist memcg reclaim because
it just moves page contents between nodes rather than
actually reducing memory consumption.

Signed-off-by: Dave Hansen <dave.hansen@...ux.intel.com>
Suggested-by: Yang Shi <yang.shi@...ux.alibaba.com>
Cc: David Rientjes <rientjes@...gle.com>
Cc: Huang Ying <ying.huang@...el.com>
Cc: Dan Williams <dan.j.williams@...el.com>
---

 b/mm/vmscan.c |   61 +++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 19 deletions(-)

diff -puN mm/vmscan.c~never-demote-for-memcg-reclaim mm/vmscan.c
--- a/mm/vmscan.c~never-demote-for-memcg-reclaim	2020-06-29 16:34:44.018312591 -0700
+++ b/mm/vmscan.c	2020-06-29 16:34:44.023312591 -0700
@@ -289,7 +289,8 @@ static bool writeback_throttling_sane(st
 #endif
 
 static inline bool can_reclaim_anon_pages(struct mem_cgroup *memcg,
-					  int node_id)
+					  int node_id,
+					  struct scan_control *sc)
 {
 	/* Always age anon pages when we have swap */
 	if (memcg == NULL) {
@@ -300,8 +301,14 @@ static inline bool can_reclaim_anon_page
 			return true;
 	}
 
-	/* Also age anon pages if we can auto-migrate them */
-	if (next_demotion_node(node_id) >= 0)
+	/*
+	 * Also age anon pages if we can auto-migrate them.
+	 *
+	 * Migrating a page does not reduce comsumption of a
+	 * memcg so should not be performed when in memcg
+	 * reclaim.
+	 */
+	if ((sc && cgroup_reclaim(sc)) && (next_demotion_node(node_id) >= 0))
 		return true;
 
 	/* No way to reclaim anon pages */
@@ -319,7 +326,7 @@ unsigned long zone_reclaimable_pages(str
 
 	nr = zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_FILE) +
 		zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_FILE);
-	if (can_reclaim_anon_pages(NULL, zone_to_nid(zone)))
+	if (can_reclaim_anon_pages(NULL, zone_to_nid(zone), NULL))
 		nr += zone_page_state_snapshot(zone, NR_ZONE_INACTIVE_ANON) +
 			zone_page_state_snapshot(zone, NR_ZONE_ACTIVE_ANON);
 
@@ -1084,6 +1091,32 @@ static void page_check_dirty_writeback(s
 		mapping->a_ops->is_dirty_writeback(page, dirty, writeback);
 }
 
+
+static int shrink_do_demote_mapping(struct page *page,
+				    struct list_head *page_list,
+				    struct scan_control *sc)
+{
+	int rc;
+
+	/* It is pointless to do demotion in memcg reclaim */
+	if (cgroup_reclaim(sc))
+		return -ENOTSUPP;
+
+	rc = migrate_demote_mapping(page);
+	/*
+	 * -ENOMEM on a THP may indicate either migration is
+	 * unsupported or there was not enough contiguous
+	 * space. Split the THP into base pages and retry the
+	 * head immediately. The tail pages will be considered
+	 * individually within the current loop's page list.
+	 */
+	if (rc == -ENOMEM && PageTransHuge(page) &&
+	    !split_huge_page_to_list(page, page_list))
+		rc = migrate_demote_mapping(page);
+
+	return rc;
+}
+
 /*
  * shrink_page_list() returns the number of reclaimed pages
  */
@@ -1251,17 +1284,7 @@ static unsigned long shrink_page_list(st
 			; /* try to reclaim the page below */
 		}
 
-		rc = migrate_demote_mapping(page);
-		/*
-		 * -ENOMEM on a THP may indicate either migration is
-		 * unsupported or there was not enough contiguous
-		 * space. Split the THP into base pages and retry the
-		 * head immediately. The tail pages will be considered
-		 * individually within the current loop's page list.
-		 */
-		if (rc == -ENOMEM && PageTransHuge(page) &&
-		    !split_huge_page_to_list(page, page_list))
-			rc = migrate_demote_mapping(page);
+		rc = shrink_do_demote_mapping(page, page_list, sc);
 
 		if (rc == MIGRATEPAGE_SUCCESS) {
 			unlock_page(page);
@@ -2287,7 +2310,7 @@ static void get_scan_count(struct lruvec
 	enum lru_list lru;
 
 	/* If we have no swap space, do not bother scanning anon pages. */
-	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id)) {
+	if (!sc->may_swap || !can_reclaim_anon_pages(memcg, pgdat->node_id, sc)) {
 		scan_balance = SCAN_FILE;
 		goto out;
 	}
@@ -2593,7 +2616,7 @@ static void shrink_lruvec(struct lruvec
 	 * rebalance the anon lru active/inactive ratio.
 	 */
 	if (can_reclaim_anon_pages(lruvec_memcg(lruvec),
-			       lruvec_pgdat(lruvec)->node_id) &&
+			       lruvec_pgdat(lruvec)->node_id, sc) &&
 	    inactive_is_low(lruvec, LRU_INACTIVE_ANON))
 		shrink_active_list(SWAP_CLUSTER_MAX, lruvec,
 				   sc, LRU_ACTIVE_ANON);
@@ -2664,7 +2687,7 @@ static inline bool should_continue_recla
 	 */
 	pages_for_compaction = compact_gap(sc->order);
 	inactive_lru_pages = node_page_state(pgdat, NR_INACTIVE_FILE);
-	if (can_reclaim_anon_pages(NULL, pgdat->node_id))
+	if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
 		inactive_lru_pages += node_page_state(pgdat, NR_INACTIVE_ANON);
 
 	return inactive_lru_pages > pages_for_compaction;
@@ -3417,7 +3440,7 @@ static void age_active_anon(struct pglis
 	struct mem_cgroup *memcg;
 	struct lruvec *lruvec;
 
-	if (!can_reclaim_anon_pages(NULL, pgdat->node_id))
+	if (!can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
 		return;
 
 	lruvec = mem_cgroup_lruvec(NULL, pgdat);
_

Powered by blists - more mailing lists