[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250403211820.GA447372@cmpxchg.org>
Date: Thu, 3 Apr 2025 17:18:20 -0400
From: Johannes Weiner <hannes@...xchg.org>
To: Carlos Song <carlos.song@....com>
Cc: "baolin.wang@...ux.alibaba.com" <baolin.wang@...ux.alibaba.com>,
"ying.huang@...el.com" <ying.huang@...el.com>,
"vbabka@...e.cz" <vbabka@...e.cz>,
"david@...hat.com" <david@...hat.com>,
"mgorman@...hsingularity.net" <mgorman@...hsingularity.net>,
"ziy@...dia.com" <ziy@...dia.com>,
"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>,
"linux-mm@...ck.org" <linux-mm@...ck.org>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
Brendan Jackman <jackmanb@...gle.com>
Subject: Re: Ask help about this patch c0cd6f557b90 "mm: page_alloc: fix
freelist movement during block conversion"
Hi Carlos,
On Thu, Apr 03, 2025 at 09:23:55AM +0000, Carlos Song wrote:
> Thank you for your quick ack and help! After applied this patch, it improved well.
> I apply this patch at this HEAD:
> f0a16f536332 (tag: next-20250403, origin/master, origin/HEAD) Add linux-next specific files for 20250403
>
> and do 10 times same test like what I have done before in IMX7D:
> The IRQ off tracer shows the irq_off time 7~10ms. Is this what you expected?
This is great, thank you for testing it!
> # irqsoff latency trace v1.1.5 on 6.14.0-next-20250403-00003-gf9e8473ee91a
> # --------------------------------------------------------------------
> # latency: 8111 us, #4323/4323, CPU#0 | (M:NONE VP:0, KP:0, SP:0 HP:0 #P:2)
> # -----------------
> # | task: dd-820 (uid:0 nice:0 policy:0 rt_prio:0)
> # -----------------
> # => started at: __rmqueue_pcplist
> # => ended at: _raw_spin_unlock_irqrestore
> #
> #
> # _------=> CPU#
> # / _-----=> irqs-off/BH-disabled
> # | / _----=> need-resched
> # || / _---=> hardirq/softirq
> # ||| / _--=> preempt-depth
> # |||| / _-=> migrate-disable
> # ||||| / delay
> # cmd pid |||||| time | caller
> # \ / |||||| \ | /
> dd-820 0d.... 1us : __rmqueue_pcplist
> dd-820 0d.... 3us : _raw_spin_trylock <-__rmqueue_pcplist
> dd-820 0d.... 7us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 11us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 13us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 15us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 17us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 19us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 21us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 23us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 25us : __mod_zone_page_state <-__rmqueue_pcplist
> ...
> dd-820 0d.... 1326us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1328us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1330us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1332us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1334us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1336us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1337us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1339us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1341us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1343us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1345us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1347us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1349us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1351us : __mod_zone_page_state <-__rmqueue_pcplist
> ...
> dd-820 0d.... 1660us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1662us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1664us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 1666us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1668us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1670us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1672us+: try_to_claim_block <-__rmqueue_pcplist
> dd-820 0d.... 1727us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1729us+: try_to_claim_block <-__rmqueue_pcplist
> dd-820 0d.... 1806us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1807us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1809us+: try_to_claim_block <-__rmqueue_pcplist
> dd-820 0d.... 1854us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1856us+: try_to_claim_block <-__rmqueue_pcplist
> dd-820 0d.... 1893us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1895us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1896us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1898us+: try_to_claim_block <-__rmqueue_pcplist
> dd-820 0d.... 1954us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 1956us+: try_to_claim_block <-__rmqueue_pcplist
> dd-820 0d.... 2012us : find_suitable_fallback <-__rmqueue_pcplist
> ...
> dd-820 0d.... 8077us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8079us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 8081us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8083us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 8084us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8086us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8088us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8089us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8091us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8093us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 8095us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8097us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 8098us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8100us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8102us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 8104us : find_suitable_fallback <-__rmqueue_pcplist
> dd-820 0d.... 8105us : __mod_zone_page_state <-__rmqueue_pcplist
> dd-820 0d.... 8107us : _raw_spin_unlock_irqrestore <-__rmqueue_pcplist
> dd-820 0d.... 8110us : _raw_spin_unlock_irqrestore
> dd-820 0d.... 8113us+: trace_hardirqs_on <-_raw_spin_unlock_irqrestore
> dd-820 0d.... 8156us : <stack trace>
This pattern looks much better. Once it fails to claim blocks, it goes
straight to single-page stealing.
Another observation is that find_suitable_callback() is hot. Looking
closer at that function, I think there are a few optimizations we can
do. Attaching another patch below, to go on top of the previous one.
Carlos, would you be able to give this a spin?
Thanks!
---
>From 621b1842b9fbbb26848296a5feb4daf5b038ba33 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@...xchg.org>
Date: Thu, 3 Apr 2025 16:44:32 -0400
Subject: [PATCH] mm: page_alloc: tighten up find_suitable_fallback()
find_suitable_fallback() is not as efficient as it could be:
1. should_try_claim_block() is a loop invariant. There is no point in
checking fallback areas if the caller is interested in claimable
blocks but the order and the migratetype don't allow for that.
2. __rmqueue_steal() doesn't care about claimability, so it shouldn't
have to run those tests.
Different callers want different things from this helper:
1. __compact_finished() scans orders up until it finds a claimable block
2. __rmqueue_claim() scans orders down as long as blocks are claimable
3. __rmqueue_steal() doesn't care about claimability at all
Move should_try_claim_block() out of the loop. Only test it for the
two callers who care in the first place. Distinguish "no blocks" from
"order + mt are not claimable" in the return value; __rmqueue_claim()
can stop once order becomes unclaimable, __compact_finished() can keep
advancing until order becomes claimable.
Signed-off-by: Johannes Weiner <hannes@...xchg.org>
---
mm/compaction.c | 4 +---
mm/internal.h | 2 +-
mm/page_alloc.c | 31 +++++++++++++------------------
3 files changed, 15 insertions(+), 22 deletions(-)
diff --git a/mm/compaction.c b/mm/compaction.c
index 139f00c0308a..7462a02802a5 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2348,7 +2348,6 @@ static enum compact_result __compact_finished(struct compact_control *cc)
ret = COMPACT_NO_SUITABLE_PAGE;
for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
struct free_area *area = &cc->zone->free_area[order];
- bool claim_block;
/* Job done if page is free of the right migratetype */
if (!free_area_empty(area, migratetype))
@@ -2364,8 +2363,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
* Job done if allocation would steal freepages from
* other migratetype buddy lists.
*/
- if (find_suitable_fallback(area, order, migratetype,
- true, &claim_block) != -1)
+ if (find_suitable_fallback(area, order, migratetype, true) >= 0)
/*
* Movable pages are OK in any pageblock. If we are
* stealing for a non-movable allocation, make sure
diff --git a/mm/internal.h b/mm/internal.h
index 50c2f590b2d0..55384b9971c3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -915,7 +915,7 @@ static inline void init_cma_pageblock(struct page *page)
int find_suitable_fallback(struct free_area *area, unsigned int order,
- int migratetype, bool claim_only, bool *claim_block);
+ int migratetype, bool claimable);
static inline bool free_area_empty(struct free_area *area, int migratetype)
{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 03b0d45ed45a..1522e3a29b16 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2077,31 +2077,25 @@ static bool should_try_claim_block(unsigned int order, int start_mt)
/*
* Check whether there is a suitable fallback freepage with requested order.
- * Sets *claim_block to instruct the caller whether it should convert a whole
- * pageblock to the returned migratetype.
- * If only_claim is true, this function returns fallback_mt only if
+ * If claimable is true, this function returns fallback_mt only if
* we would do this whole-block claiming. This would help to reduce
* fragmentation due to mixed migratetype pages in one pageblock.
*/
int find_suitable_fallback(struct free_area *area, unsigned int order,
- int migratetype, bool only_claim, bool *claim_block)
+ int migratetype, bool claimable)
{
int i;
- int fallback_mt;
+
+ if (claimable && !should_try_claim_block(order, migratetype))
+ return -2;
if (area->nr_free == 0)
return -1;
- *claim_block = false;
for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
- fallback_mt = fallbacks[migratetype][i];
- if (free_area_empty(area, fallback_mt))
- continue;
+ int fallback_mt = fallbacks[migratetype][i];
- if (should_try_claim_block(order, migratetype))
- *claim_block = true;
-
- if (*claim_block || !only_claim)
+ if (!free_area_empty(area, fallback_mt))
return fallback_mt;
}
@@ -2206,7 +2200,6 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
int min_order = order;
struct page *page;
int fallback_mt;
- bool claim_block;
/*
* Do not steal pages from freelists belonging to other pageblocks
@@ -2225,11 +2218,14 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
--current_order) {
area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order,
- start_migratetype, false, &claim_block);
+ start_migratetype, true);
+
+ /* No block in that order */
if (fallback_mt == -1)
continue;
- if (!claim_block)
+ /* Advanced into orders too low to claim, abort */
+ if (fallback_mt == -2)
break;
page = get_page_from_free_area(area, fallback_mt);
@@ -2254,12 +2250,11 @@ __rmqueue_steal(struct zone *zone, int order, int start_migratetype)
int current_order;
struct page *page;
int fallback_mt;
- bool claim_block;
for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order,
- start_migratetype, false, &claim_block);
+ start_migratetype, false);
if (fallback_mt == -1)
continue;
--
2.49.0
Powered by blists - more mailing lists