linux-kernel - [PATCH RFC v2 2/3] mm/page_alloc: refactor the initial compaction handling

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251219-thp-thisnode-tweak-v2-2-0c01f231fd1c@suse.cz>
Date: Fri, 19 Dec 2025 18:38:52 +0100
From: Vlastimil Babka <vbabka@...e.cz>
To: Andrew Morton <akpm@...ux-foundation.org>, 
 Suren Baghdasaryan <surenb@...gle.com>, Michal Hocko <mhocko@...e.com>, 
 Brendan Jackman <jackmanb@...gle.com>, Johannes Weiner <hannes@...xchg.org>, 
 Zi Yan <ziy@...dia.com>, David Rientjes <rientjes@...gle.com>, 
 David Hildenbrand <david@...nel.org>, 
 Lorenzo Stoakes <lorenzo.stoakes@...cle.com>, 
 "Liam R. Howlett" <Liam.Howlett@...cle.com>, 
 Mike Rapoport <rppt@...nel.org>, Joshua Hahn <joshua.hahnjy@...il.com>, 
 Pedro Falcato <pfalcato@...e.de>
Cc: linux-mm@...ck.org, linux-kernel@...r.kernel.org, 
 Vlastimil Babka <vbabka@...e.cz>
Subject: [PATCH RFC v2 2/3] mm/page_alloc: refactor the initial compaction
 handling

The initial direct compaction done in some cases in
__alloc_pages_slowpath() stands out from the main retry loop of
reclaim + compaction.

We can simplify this by instead skipping the initial reclaim attempt via
a new local variable compact_first, and handle the compact_prority to
match the original behavior.

Suggested-by: Johannes Weiner <hannes@...xchg.org>
Signed-off-by: Vlastimil Babka <vbabka@...e.cz>
---
 mm/page_alloc.c | 106 +++++++++++++++++++++++++++++---------------------------
 1 file changed, 54 insertions(+), 52 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9e7b0967f1b5..cb8965fd5e20 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4512,6 +4512,11 @@ static bool oom_reserves_allowed(struct task_struct *tsk)
 	return true;
 }
 
+static inline bool gfp_thisnode_noretry(gfp_t gfp_mask)
+{
+	return (gfp_mask & __GFP_NORETRY) && (gfp_mask & __GFP_THISNODE);
+}
+
 /*
  * Distinguish requests which really need access to full memory
  * reserves from oom victims which can live with a portion of it
@@ -4664,7 +4669,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 						struct alloc_context *ac)
 {
 	bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
-	bool can_compact = gfp_compaction_allowed(gfp_mask);
+	bool can_compact = can_direct_reclaim && gfp_compaction_allowed(gfp_mask);
 	bool nofail = gfp_mask & __GFP_NOFAIL;
 	const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
 	struct page *page = NULL;
@@ -4677,6 +4682,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned int cpuset_mems_cookie;
 	unsigned int zonelist_iter_cookie;
 	int reserve_flags;
+	bool compact_first = false;
 
 	if (unlikely(nofail)) {
 		/*
@@ -4700,6 +4706,19 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	cpuset_mems_cookie = read_mems_allowed_begin();
 	zonelist_iter_cookie = zonelist_iter_begin();
 
+	/*
+	 * For costly allocations, try direct compaction first, as it's likely
+	 * that we have enough base pages and don't need to reclaim. For non-
+	 * movable high-order allocations, do that as well, as compaction will
+	 * try prevent permanent fragmentation by migrating from blocks of the
+	 * same migratetype.
+	 */
+	if (can_compact && (costly_order || (order > 0 &&
+					ac->migratetype != MIGRATE_MOVABLE))) {
+		compact_first = true;
+		compact_priority = INIT_COMPACT_PRIORITY;
+	}
+
 	/*
 	 * The fast path uses conservative alloc_flags to succeed only until
 	 * kswapd needs to be woken up, and to avoid the cost of setting up
@@ -4742,53 +4761,6 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	if (page)
 		goto got_pg;
 
-	/*
-	 * For costly allocations, try direct compaction first, as it's likely
-	 * that we have enough base pages and don't need to reclaim. For non-
-	 * movable high-order allocations, do that as well, as compaction will
-	 * try prevent permanent fragmentation by migrating from blocks of the
-	 * same migratetype.
-	 * Don't try this for allocations that are allowed to ignore
-	 * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
-	 */
-	if (can_direct_reclaim && can_compact &&
-			(costly_order ||
-			   (order > 0 && ac->migratetype != MIGRATE_MOVABLE))
-			&& !gfp_pfmemalloc_allowed(gfp_mask)) {
-		page = __alloc_pages_direct_compact(gfp_mask, order,
-						alloc_flags, ac,
-						INIT_COMPACT_PRIORITY,
-						&compact_result);
-		if (page)
-			goto got_pg;
-
-		/*
-		 * Checks for costly allocations with __GFP_NORETRY, which
-		 * includes some THP page fault allocations
-		 */
-		if (costly_order && (gfp_mask & __GFP_NORETRY)) {
-			/*
-			 * THP page faults may attempt local node only first,
-			 * but are then allowed to only compact, not reclaim,
-			 * see alloc_pages_mpol().
-			 *
-			 * Compaction has failed above and we don't want such
-			 * THP allocations to put reclaim pressure on a single
-			 * node in a situation where other nodes might have
-			 * plenty of available memory.
-			 */
-			if (gfp_mask & __GFP_THISNODE)
-				goto nopage;
-
-			/*
-			 * Proceed with single round of reclaim/compaction, but
-			 * since sync compaction could be very expensive, keep
-			 * using async compaction.
-			 */
-			compact_priority = INIT_COMPACT_PRIORITY;
-		}
-	}
-
 retry:
 	/*
 	 * Deal with possible cpuset update races or zonelist updates to avoid
@@ -4832,10 +4804,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 		goto nopage;
 
 	/* Try direct reclaim and then allocating */
-	page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags, ac,
-							&did_some_progress);
-	if (page)
-		goto got_pg;
+	if (!compact_first) {
+		page = __alloc_pages_direct_reclaim(gfp_mask, order, alloc_flags,
+							ac, &did_some_progress);
+		if (page)
+			goto got_pg;
+	}
 
 	/* Try direct compaction and then allocating */
 	page = __alloc_pages_direct_compact(gfp_mask, order, alloc_flags, ac,
@@ -4843,6 +4817,34 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	if (page)
 		goto got_pg;
 
+	if (compact_first) {
+		/*
+		 * THP page faults may attempt local node only first, but are
+		 * then allowed to only compact, not reclaim, see
+		 * alloc_pages_mpol().
+		 *
+		 * Compaction has failed above and we don't want such THP
+		 * allocations to put reclaim pressure on a single node in a
+		 * situation where other nodes might have plenty of available
+		 * memory.
+		 */
+		if (gfp_thisnode_noretry(gfp_mask))
+			goto nopage;
+
+		/*
+		 * For the initial compaction attempt we have lowered its
+		 * priority. Restore it for further retries. With __GFP_NORETRY
+		 * there will be a single round of reclaim+compaction with the
+		 * lowered priority.
+		 */
+		if (!(gfp_mask & __GFP_NORETRY)) {
+			compact_priority = DEF_COMPACT_PRIORITY;
+		}
+
+		compact_first = false;
+		goto retry;
+	}
+
 	/* Do not loop if specifically requested */
 	if (gfp_mask & __GFP_NORETRY)
 		goto nopage;

-- 
2.52.0