lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250403211820.GA447372@cmpxchg.org>
Date: Thu, 3 Apr 2025 17:18:20 -0400
From: Johannes Weiner <hannes@...xchg.org>
To: Carlos Song <carlos.song@....com>
Cc: "baolin.wang@...ux.alibaba.com" <baolin.wang@...ux.alibaba.com>,
	"ying.huang@...el.com" <ying.huang@...el.com>,
	"vbabka@...e.cz" <vbabka@...e.cz>,
	"david@...hat.com" <david@...hat.com>,
	"mgorman@...hsingularity.net" <mgorman@...hsingularity.net>,
	"ziy@...dia.com" <ziy@...dia.com>,
	"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>,
	"linux-mm@...ck.org" <linux-mm@...ck.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
	Brendan Jackman <jackmanb@...gle.com>
Subject: Re: Ask help about this patch c0cd6f557b90 "mm: page_alloc: fix
 freelist movement during block conversion"

Hi Carlos,

On Thu, Apr 03, 2025 at 09:23:55AM +0000, Carlos Song wrote:
> Thank you for your quick ack and help! After applied this patch, it improved well.
> I apply this patch at this HEAD:
> f0a16f536332 (tag: next-20250403, origin/master, origin/HEAD) Add linux-next specific files for 20250403
> 
> and do 10 times same test like what I have done before in IMX7D:
> The IRQ off tracer shows the irq_off time 7~10ms. Is this what you expected?

This is great, thank you for testing it!

> # irqsoff latency trace v1.1.5 on 6.14.0-next-20250403-00003-gf9e8473ee91a
> # --------------------------------------------------------------------
> # latency: 8111 us, #4323/4323, CPU#0 | (M:NONE VP:0, KP:0, SP:0 HP:0 #P:2)
> #    -----------------
> #    | task: dd-820 (uid:0 nice:0 policy:0 rt_prio:0)
> #    -----------------
> #  => started at: __rmqueue_pcplist
> #  => ended at:   _raw_spin_unlock_irqrestore
> #
> #
> #                    _------=> CPU#
> #                   / _-----=> irqs-off/BH-disabled
> #                  | / _----=> need-resched
> #                  || / _---=> hardirq/softirq
> #                  ||| / _--=> preempt-depth
> #                  |||| / _-=> migrate-disable
> #                  ||||| /     delay
> #  cmd     pid     |||||| time  |   caller
> #     \   /        ||||||  \    |    /
>       dd-820       0d....    1us : __rmqueue_pcplist
>       dd-820       0d....    3us : _raw_spin_trylock <-__rmqueue_pcplist
>       dd-820       0d....    7us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   11us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   13us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   15us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   17us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   19us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   21us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   23us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d....   25us : __mod_zone_page_state <-__rmqueue_pcplist
> ...
>       dd-820       0d.... 1326us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1328us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1330us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1332us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1334us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1336us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1337us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1339us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1341us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1343us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1345us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1347us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1349us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1351us : __mod_zone_page_state <-__rmqueue_pcplist
> ...
>       dd-820       0d.... 1660us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1662us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1664us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 1666us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1668us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1670us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1672us+: try_to_claim_block <-__rmqueue_pcplist
>       dd-820       0d.... 1727us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1729us+: try_to_claim_block <-__rmqueue_pcplist
>       dd-820       0d.... 1806us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1807us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1809us+: try_to_claim_block <-__rmqueue_pcplist
>       dd-820       0d.... 1854us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1856us+: try_to_claim_block <-__rmqueue_pcplist
>       dd-820       0d.... 1893us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1895us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1896us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1898us+: try_to_claim_block <-__rmqueue_pcplist
>       dd-820       0d.... 1954us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 1956us+: try_to_claim_block <-__rmqueue_pcplist
>       dd-820       0d.... 2012us : find_suitable_fallback <-__rmqueue_pcplist
> ...
>      dd-820       0d.... 8077us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8079us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 8081us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8083us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 8084us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8086us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8088us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8089us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8091us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8093us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 8095us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8097us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 8098us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8100us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8102us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 8104us : find_suitable_fallback <-__rmqueue_pcplist
>       dd-820       0d.... 8105us : __mod_zone_page_state <-__rmqueue_pcplist
>       dd-820       0d.... 8107us : _raw_spin_unlock_irqrestore <-__rmqueue_pcplist
>       dd-820       0d.... 8110us : _raw_spin_unlock_irqrestore
>       dd-820       0d.... 8113us+: trace_hardirqs_on <-_raw_spin_unlock_irqrestore
>       dd-820       0d.... 8156us : <stack trace>

This pattern looks much better. Once it fails to claim blocks, it goes
straight to single-page stealing.

Another observation is that find_suitable_callback() is hot. Looking
closer at that function, I think there are a few optimizations we can
do. Attaching another patch below, to go on top of the previous one.

Carlos, would you be able to give this a spin?

Thanks!

---

>From 621b1842b9fbbb26848296a5feb4daf5b038ba33 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@...xchg.org>
Date: Thu, 3 Apr 2025 16:44:32 -0400
Subject: [PATCH] mm: page_alloc: tighten up find_suitable_fallback()

find_suitable_fallback() is not as efficient as it could be:

1. should_try_claim_block() is a loop invariant. There is no point in
   checking fallback areas if the caller is interested in claimable
   blocks but the order and the migratetype don't allow for that.

2. __rmqueue_steal() doesn't care about claimability, so it shouldn't
   have to run those tests.

Different callers want different things from this helper:

1. __compact_finished() scans orders up until it finds a claimable block
2. __rmqueue_claim() scans orders down as long as blocks are claimable
3. __rmqueue_steal() doesn't care about claimability at all

Move should_try_claim_block() out of the loop. Only test it for the
two callers who care in the first place. Distinguish "no blocks" from
"order + mt are not claimable" in the return value; __rmqueue_claim()
can stop once order becomes unclaimable, __compact_finished() can keep
advancing until order becomes claimable.

Signed-off-by: Johannes Weiner <hannes@...xchg.org>
---
 mm/compaction.c |  4 +---
 mm/internal.h   |  2 +-
 mm/page_alloc.c | 31 +++++++++++++------------------
 3 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 139f00c0308a..7462a02802a5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2348,7 +2348,6 @@ static enum compact_result __compact_finished(struct compact_control *cc)
 	ret = COMPACT_NO_SUITABLE_PAGE;
 	for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
 		struct free_area *area = &cc->zone->free_area[order];
-		bool claim_block;
 
 		/* Job done if page is free of the right migratetype */
 		if (!free_area_empty(area, migratetype))
@@ -2364,8 +2363,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
 		 * Job done if allocation would steal freepages from
 		 * other migratetype buddy lists.
 		 */
-		if (find_suitable_fallback(area, order, migratetype,
-						true, &claim_block) != -1)
+		if (find_suitable_fallback(area, order, migratetype, true) >= 0)
 			/*
 			 * Movable pages are OK in any pageblock. If we are
 			 * stealing for a non-movable allocation, make sure
diff --git a/mm/internal.h b/mm/internal.h
index 50c2f590b2d0..55384b9971c3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -915,7 +915,7 @@ static inline void init_cma_pageblock(struct page *page)
 
 
 int find_suitable_fallback(struct free_area *area, unsigned int order,
-			int migratetype, bool claim_only, bool *claim_block);
+			   int migratetype, bool claimable);
 
 static inline bool free_area_empty(struct free_area *area, int migratetype)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 03b0d45ed45a..1522e3a29b16 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2077,31 +2077,25 @@ static bool should_try_claim_block(unsigned int order, int start_mt)
 
 /*
  * Check whether there is a suitable fallback freepage with requested order.
- * Sets *claim_block to instruct the caller whether it should convert a whole
- * pageblock to the returned migratetype.
- * If only_claim is true, this function returns fallback_mt only if
+ * If claimable is true, this function returns fallback_mt only if
  * we would do this whole-block claiming. This would help to reduce
  * fragmentation due to mixed migratetype pages in one pageblock.
  */
 int find_suitable_fallback(struct free_area *area, unsigned int order,
-			int migratetype, bool only_claim, bool *claim_block)
+			   int migratetype, bool claimable)
 {
 	int i;
-	int fallback_mt;
+
+	if (claimable && !should_try_claim_block(order, migratetype))
+		return -2;
 
 	if (area->nr_free == 0)
 		return -1;
 
-	*claim_block = false;
 	for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
-		fallback_mt = fallbacks[migratetype][i];
-		if (free_area_empty(area, fallback_mt))
-			continue;
+		int fallback_mt = fallbacks[migratetype][i];
 
-		if (should_try_claim_block(order, migratetype))
-			*claim_block = true;
-
-		if (*claim_block || !only_claim)
+		if (!free_area_empty(area, fallback_mt))
 			return fallback_mt;
 	}
 
@@ -2206,7 +2200,6 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
 	int min_order = order;
 	struct page *page;
 	int fallback_mt;
-	bool claim_block;
 
 	/*
 	 * Do not steal pages from freelists belonging to other pageblocks
@@ -2225,11 +2218,14 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
 				--current_order) {
 		area = &(zone->free_area[current_order]);
 		fallback_mt = find_suitable_fallback(area, current_order,
-				start_migratetype, false, &claim_block);
+						     start_migratetype, true);
+
+		/* No block in that order */
 		if (fallback_mt == -1)
 			continue;
 
-		if (!claim_block)
+		/* Advanced into orders too low to claim, abort */
+		if (fallback_mt == -2)
 			break;
 
 		page = get_page_from_free_area(area, fallback_mt);
@@ -2254,12 +2250,11 @@ __rmqueue_steal(struct zone *zone, int order, int start_migratetype)
 	int current_order;
 	struct page *page;
 	int fallback_mt;
-	bool claim_block;
 
 	for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
 		area = &(zone->free_area[current_order]);
 		fallback_mt = find_suitable_fallback(area, current_order,
-				start_migratetype, false, &claim_block);
+						     start_migratetype, false);
 		if (fallback_mt == -1)
 			continue;
 
-- 
2.49.0



Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ