lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20231109045908.54996-4-byungchul@sk.com>
Date:   Thu,  9 Nov 2023 13:59:08 +0900
From:   Byungchul Park <byungchul@...com>
To:     linux-kernel@...r.kernel.org, linux-mm@...ck.org
Cc:     kernel_team@...ynix.com, akpm@...ux-foundation.org,
        ying.huang@...el.com, namit@...are.com, xhao@...ux.alibaba.com,
        mgorman@...hsingularity.net, hughd@...gle.com, willy@...radead.org,
        david@...hat.com, peterz@...radead.org, luto@...nel.org,
        tglx@...utronix.de, mingo@...hat.com, bp@...en8.de,
        dave.hansen@...ux.intel.com
Subject: [v4 3/3] mm: Pause migrc mechanism at high memory pressure

Regression was observed when the system is in high memory pressure with
swap on, where migrc keeps expanding its pending queue and the page
allocator keeps flushing the queue and freeing folios at the same time,
which is meaningless. So temporarily prevented migrc from expanding its
pending queue on that condition.

Signed-off-by: Byungchul Park <byungchul@...com>
---
 mm/internal.h   | 17 ++++++++++++++++
 mm/migrate.c    | 53 ++++++++++++++++++++++++++++++++++++++++++++++++-
 mm/page_alloc.c | 13 ++++++++++++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/mm/internal.h b/mm/internal.h
index a2b6f0321729..971f2dded4a6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1159,6 +1159,8 @@ struct vma_prepare {
 	struct vm_area_struct *remove2;
 };
 
+extern atomic_t migrc_pause_cnt;
+
 /*
  * Initialize the page when allocated from buddy allocator.
  */
@@ -1202,6 +1204,21 @@ static inline bool can_migrc_test(void)
 	return current->can_migrc && current->tlb_ubc_ro.flush_required;
 }
 
+static inline void migrc_pause(void)
+{
+	atomic_inc(&migrc_pause_cnt);
+}
+
+static inline void migrc_resume(void)
+{
+	atomic_dec(&migrc_pause_cnt);
+}
+
+static inline bool migrc_paused(void)
+{
+	return !!atomic_read(&migrc_pause_cnt);
+}
+
 /*
  * Return the number of folios pending TLB flush that have yet to get
  * freed in the zone.
diff --git a/mm/migrate.c b/mm/migrate.c
index 9ab7794b0390..bde4f49d0144 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -100,6 +100,16 @@ static void migrc_undo_folios(struct folio *fsrc, struct folio *fdst)
 static void migrc_expand_req(struct folio *fsrc, struct folio *fdst,
 			     struct migrc_req *req)
 {
+	/*
+	 * If migrc has been paused in the middle of unmap because of
+	 * high memory pressure, then the folios that have already been
+	 * marked as pending should get back.
+	 */
+	if (!req) {
+		migrc_undo_folios(fsrc, fdst);
+		return;
+	}
+
 	if (req->nid == -1)
 		req->nid = folio_nid(fsrc);
 
@@ -147,6 +157,12 @@ static void migrc_req_end(struct migrc_req *req)
 	llist_add(&req->llnode, &NODE_DATA(req->nid)->migrc_reqs);
 }
 
+/*
+ * Increase on entry of handling high memory pressure e.g. direct
+ * reclaim, decrease on the exit. See __alloc_pages_slowpath().
+ */
+atomic_t migrc_pause_cnt = ATOMIC_INIT(0);
+
 /*
  * Gather folios and architecture specific data to handle.
  */
@@ -213,6 +229,31 @@ static void fold_ubc_ro_to_migrc(struct migrc_req *req)
 	tlb_ubc_ro->flush_required = false;
 }
 
+static void fold_migrc_to_ubc(struct migrc_req *req)
+{
+	struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+
+	if (!req)
+		return;
+
+	/*
+	 * Fold the req's data to tlb_ubc.
+	 */
+	arch_tlbbatch_fold(&tlb_ubc->arch, &req->arch);
+
+	/*
+	 * Reset the req's data.
+	 */
+	arch_tlbbatch_clear(&req->arch);
+
+	/*
+	 * req->arch might be empty. However, conservatively set
+	 * ->flush_required to true so that try_to_unmap_flush() can
+	 * check it anyway.
+	 */
+	tlb_ubc->flush_required = true;
+}
+
 bool isolate_movable_page(struct page *page, isolate_mode_t mode)
 {
 	struct folio *folio = folio_get_nontail_page(page);
@@ -1791,7 +1832,7 @@ static int migrate_pages_batch(struct list_head *from,
 	/*
 	 * Apply migrc only to numa migration for now.
 	 */
-	if (reason == MR_DEMOTION || reason == MR_NUMA_MISPLACED)
+	if (!migrc_paused() && (reason == MR_DEMOTION || reason == MR_NUMA_MISPLACED))
 		mreq = migrc_req_start();
 
 	for (pass = 0; pass < nr_pass && retry; pass++) {
@@ -1829,6 +1870,16 @@ static int migrate_pages_batch(struct list_head *from,
 				continue;
 			}
 
+			/*
+			 * In case that the system is in high memory
+			 * pressure, give up migrc mechanism this turn.
+			 */
+			if (unlikely(mreq && migrc_paused())) {
+				fold_migrc_to_ubc(mreq);
+				migrc_req_end(mreq);
+				mreq = NULL;
+			}
+
 			can_migrc_init();
 			rc = migrate_folio_unmap(get_new_folio, put_new_folio,
 					private, folio, &dst, mode, reason,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 914e93ab598e..c920ad48f741 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3926,6 +3926,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned int cpuset_mems_cookie;
 	unsigned int zonelist_iter_cookie;
 	int reserve_flags;
+	bool migrc_paused = false;
 
 restart:
 	compaction_retries = 0;
@@ -4057,6 +4058,16 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	if (page)
 		goto got_pg;
 
+	/*
+	 * The system is in very high memory pressure. Pause migrc from
+	 * expanding its pending queue temporarily.
+	 */
+	if (!migrc_paused) {
+		migrc_pause();
+		migrc_paused = true;
+		migrc_flush_free_folios(NULL);
+	}
+
 	/* Caller is not willing to reclaim, we can't balance anything */
 	if (!can_direct_reclaim)
 		goto nopage;
@@ -4184,6 +4195,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	warn_alloc(gfp_mask, ac->nodemask,
 			"page allocation failure: order:%u", order);
 got_pg:
+	if (migrc_paused)
+		migrc_resume();
 	return page;
 }
 
-- 
2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ