lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c0fc9e59-ba64-4faa-a03f-f8940fb466a1@amd.com>
Date: Thu, 10 Apr 2025 16:20:43 +0530
From: Shivank Garg <shivankg@....com>
To: Johannes Weiner <hannes@...xchg.org>,
 Andrew Morton <akpm@...ux-foundation.org>
Cc: Vlastimil Babka <vbabka@...e.cz>, Brendan Jackman <jackmanb@...gle.com>,
 Mel Gorman <mgorman@...hsingularity.net>, Carlos Song <carlos.song@....com>,
 linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] mm: page_alloc: tighten up find_suitable_fallback()



On 4/7/2025 11:31 PM, Johannes Weiner wrote:
> find_suitable_fallback() is not as efficient as it could be, and
> somewhat difficult to follow.
> 
> 1. should_try_claim_block() is a loop invariant. There is no point in
>    checking fallback areas if the caller is interested in claimable
>    blocks but the order and the migratetype don't allow for that.
> 
> 2. __rmqueue_steal() doesn't care about claimability, so it shouldn't
>    have to run those tests.
> 
> Different callers want different things from this helper:
> 
> 1. __compact_finished() scans orders up until it finds a claimable block
> 2. __rmqueue_claim() scans orders down as long as blocks are claimable
> 3. __rmqueue_steal() doesn't care about claimability at all
> 
> Move should_try_claim_block() out of the loop. Only test it for the
> two callers who care in the first place. Distinguish "no blocks" from
> "order + mt are not claimable" in the return value; __rmqueue_claim()
> can stop once order becomes unclaimable, __compact_finished() can keep
> advancing until order becomes claimable.
> 
> Before:
> 
>  Performance counter stats for './run case-lru-file-mmap-read' (5 runs):
> 
> 	 85,294.85 msec task-clock                       #    5.644 CPUs utilized               ( +-  0.32% )
> 	    15,968      context-switches                 #  187.209 /sec                        ( +-  3.81% )
> 	       153      cpu-migrations                   #    1.794 /sec                        ( +-  3.29% )
> 	   801,808      page-faults                      #    9.400 K/sec                       ( +-  0.10% )
>    733,358,331,786      instructions                     #    1.87  insn per cycle              ( +-  0.20% )  (64.94%)
>    392,622,904,199      cycles                           #    4.603 GHz                         ( +-  0.31% )  (64.84%)
>    148,563,488,531      branches                         #    1.742 G/sec                       ( +-  0.18% )  (63.86%)
>        152,143,228      branch-misses                    #    0.10% of all branches             ( +-  1.19% )  (62.82%)
> 
> 	   15.1128 +- 0.0637 seconds time elapsed  ( +-  0.42% )
> 
> After:
> 
>  Performance counter stats for './run case-lru-file-mmap-read' (5 runs):
> 
>          84,380.21 msec task-clock                       #    5.664 CPUs utilized               ( +-  0.21% )
>             16,656      context-switches                 #  197.392 /sec                        ( +-  3.27% )
>                151      cpu-migrations                   #    1.790 /sec                        ( +-  3.28% )
>            801,703      page-faults                      #    9.501 K/sec                       ( +-  0.09% )
>    731,914,183,060      instructions                     #    1.88  insn per cycle              ( +-  0.38% )  (64.90%)
>    388,673,535,116      cycles                           #    4.606 GHz                         ( +-  0.24% )  (65.06%)
>    148,251,482,143      branches                         #    1.757 G/sec                       ( +-  0.37% )  (63.92%)
>        149,766,550      branch-misses                    #    0.10% of all branches             ( +-  1.22% )  (62.88%)
> 
>            14.8968 +- 0.0486 seconds time elapsed  ( +-  0.33% )
> 
> Signed-off-by: Johannes Weiner <hannes@...xchg.org>
> ---
>  mm/compaction.c |  4 +---
>  mm/internal.h   |  2 +-
>  mm/page_alloc.c | 31 +++++++++++++------------------
>  3 files changed, 15 insertions(+), 22 deletions(-)
> 
> diff --git a/mm/compaction.c b/mm/compaction.c
> index 139f00c0308a..7462a02802a5 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -2348,7 +2348,6 @@ static enum compact_result __compact_finished(struct compact_control *cc)
>  	ret = COMPACT_NO_SUITABLE_PAGE;
>  	for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
>  		struct free_area *area = &cc->zone->free_area[order];
> -		bool claim_block;
>  
>  		/* Job done if page is free of the right migratetype */
>  		if (!free_area_empty(area, migratetype))
> @@ -2364,8 +2363,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
>  		 * Job done if allocation would steal freepages from
>  		 * other migratetype buddy lists.
>  		 */
> -		if (find_suitable_fallback(area, order, migratetype,
> -						true, &claim_block) != -1)
> +		if (find_suitable_fallback(area, order, migratetype, true) >= 0)
>  			/*
>  			 * Movable pages are OK in any pageblock. If we are
>  			 * stealing for a non-movable allocation, make sure
> diff --git a/mm/internal.h b/mm/internal.h
> index 50c2f590b2d0..55384b9971c3 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -915,7 +915,7 @@ static inline void init_cma_pageblock(struct page *page)
>  
>  
>  int find_suitable_fallback(struct free_area *area, unsigned int order,
> -			int migratetype, bool claim_only, bool *claim_block);
> +			   int migratetype, bool claimable);
>  
>  static inline bool free_area_empty(struct free_area *area, int migratetype)
>  {
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 03b0d45ed45a..1522e3a29b16 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -2077,31 +2077,25 @@ static bool should_try_claim_block(unsigned int order, int start_mt)
>  
>  /*
>   * Check whether there is a suitable fallback freepage with requested order.
> - * Sets *claim_block to instruct the caller whether it should convert a whole
> - * pageblock to the returned migratetype.
> - * If only_claim is true, this function returns fallback_mt only if
> + * If claimable is true, this function returns fallback_mt only if
>   * we would do this whole-block claiming. This would help to reduce
>   * fragmentation due to mixed migratetype pages in one pageblock.
>   */
>  int find_suitable_fallback(struct free_area *area, unsigned int order,
> -			int migratetype, bool only_claim, bool *claim_block)
> +			   int migratetype, bool claimable)
>  {
>  	int i;
> -	int fallback_mt;
> +
> +	if (claimable && !should_try_claim_block(order, migratetype))
> +		return -2;
>  
>  	if (area->nr_free == 0)
>  		return -1;
>  
> -	*claim_block = false;
>  	for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
> -		fallback_mt = fallbacks[migratetype][i];
> -		if (free_area_empty(area, fallback_mt))
> -			continue;
> +		int fallback_mt = fallbacks[migratetype][i];
>  
> -		if (should_try_claim_block(order, migratetype))
> -			*claim_block = true;
> -
> -		if (*claim_block || !only_claim)
> +		if (!free_area_empty(area, fallback_mt))
>  			return fallback_mt;
>  	}
>  
> @@ -2206,7 +2200,6 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
>  	int min_order = order;
>  	struct page *page;
>  	int fallback_mt;
> -	bool claim_block;
>  
>  	/*
>  	 * Do not steal pages from freelists belonging to other pageblocks
> @@ -2225,11 +2218,14 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
>  				--current_order) {
>  		area = &(zone->free_area[current_order]);
>  		fallback_mt = find_suitable_fallback(area, current_order,
> -				start_migratetype, false, &claim_block);
> +						     start_migratetype, true);
> +
> +		/* No block in that order */
>  		if (fallback_mt == -1)
>  			continue;
>  
> -		if (!claim_block)
> +		/* Advanced into orders too low to claim, abort */
> +		if (fallback_mt == -2)
>  			break;
>  
>  		page = get_page_from_free_area(area, fallback_mt);
> @@ -2254,12 +2250,11 @@ __rmqueue_steal(struct zone *zone, int order, int start_migratetype)
>  	int current_order;
>  	struct page *page;
>  	int fallback_mt;
> -	bool claim_block;
>  
>  	for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
>  		area = &(zone->free_area[current_order]);
>  		fallback_mt = find_suitable_fallback(area, current_order,
> -				start_migratetype, false, &claim_block);
> +						     start_migratetype, false);
>  		if (fallback_mt == -1)
>  			continue;
>  


Tested-by: Shivank Garg <shivankg@....com>

Thanks,
Shivank

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ