linux-kernel - Re: [PATCH 1/2] mm: call back alloc_pages_bulk

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251014114124.145165bc@mordecai.tesarici.cz>
Date: Tue, 14 Oct 2025 11:41:24 +0200
From: Petr Tesarik <ptesarik@...e.com>
To: "zhaoyang.huang" <zhaoyang.huang@...soc.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, David Hildenbrand
 <david@...hat.com>, Matthew Wilcox <willy@...radead.org>, Mel Gorman
 <mgorman@...hsingularity.net>, Vlastimil Babka <vbabka@...e.cz>, Sumit
 Semwal <sumit.semwal@...aro.org>, Benjamin Gaignard
 <benjamin.gaignard@...labora.com>, Brian Starkey <Brian.Starkey@....com>,
 John Stultz <jstultz@...gle.com>, "T . J . Mercier" <tjmercier@...gle.com>,
 Christian König <christian.koenig@....com>,
 <linux-media@...r.kernel.org>, <dri-devel@...ts.freedesktop.org>,
 <linaro-mm-sig@...ts.linaro.org>, <linux-mm@...ck.org>,
 <linux-kernel@...r.kernel.org>, Zhaoyang Huang <huangzhaoyang@...il.com>,
 <steve.kang@...soc.com>
Subject: Re: [PATCH 1/2] mm: call back alloc_pages_bulk_list since it is
 useful

On Tue, 14 Oct 2025 16:32:29 +0800
"zhaoyang.huang" <zhaoyang.huang@...soc.com> wrote:

> From: Zhaoyang Huang <zhaoyang.huang@...soc.com>
> 
> commit c8b979530f27 ("mm: alloc_pages_bulk_noprof: drop page_list
> argument") drops alloc_pages_bulk_list. This commit would like to call back
> it since it is proved to be helpful to the drivers which allocate a bulk of
> pages(see patch of 2 in this series ).
> I do notice that Matthew's comment of the time cost of iterating a list.
> However, I also observed in our test that the extra page_array's allocation
> could be more expensive than cpu iteration when direct reclaiming happens
> when ram is low[1]. IMHO, could we leave the API here to have the users
> choose between the array or list according to their scenarios.

OK, so this is more or less a revert of commit c8b979530f27 ("mm:
alloc_pages_bulk_noprof: drop page_list argument")...

I cannot comment on the performance gains, but I dislike the fact that
the patch re-introduces alloc_pages_bulk_noprof() as a function with two
signatures (either page_list is used, or page_array is used).

If we can agree that allocations onto a linked list are useful, then I
suggest to split the existing function so that the common bits end up
in helper functions, called by both variants (one function using a
list, one using an array).

Petr T

> [1]
> android.hardwar-728     [002] .....   334.573875: system_heap_do_allocate: Execution time: order 0 1 us
> android.hardwar-728     [002] .....   334.573879: system_heap_do_allocate: Execution time: order 0 2 us
> android.hardwar-728     [002] .....   334.574239: system_heap_do_allocate: Execution time: order 0 354 us
> android.hardwar-728     [002] .....   334.574247: system_heap_do_allocate: Execution time: order 0 4 us
> android.hardwar-728     [002] .....   334.574250: system_heap_do_allocate: Execution time: order 0 2 us
> 
> Signed-off-by: Zhaoyang Huang <zhaoyang.huang@...soc.com>
> ---
>  include/linux/gfp.h |  9 +++++++--
>  mm/mempolicy.c      | 14 +++++++-------
>  mm/page_alloc.c     | 39 +++++++++++++++++++++++++++------------
>  3 files changed, 41 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 5ebf26fcdcfa..f1540c9fcd87 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -231,6 +231,7 @@ struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_
>  
>  unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  				nodemask_t *nodemask, int nr_pages,
> +				struct list_head *page_list,
>  				struct page **page_array);
>  #define __alloc_pages_bulk(...)			alloc_hooks(alloc_pages_bulk_noprof(__VA_ARGS__))
>  
> @@ -242,7 +243,11 @@ unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp,
>  
>  /* Bulk allocate order-0 pages */
>  #define alloc_pages_bulk(_gfp, _nr_pages, _page_array)		\
> -	__alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _page_array)
> +	__alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, NULL, _page_array)
> +
> +#define alloc_pages_bulk_list(_gfp, _nr_pages, _list)			\
> +	__alloc_pages_bulk(_gfp, numa_mem_id(), NULL, _nr_pages, _list, NULL)
> +
>  
>  static inline unsigned long
>  alloc_pages_bulk_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages,
> @@ -251,7 +256,7 @@ alloc_pages_bulk_node_noprof(gfp_t gfp, int nid, unsigned long nr_pages,
>  	if (nid == NUMA_NO_NODE)
>  		nid = numa_mem_id();
>  
> -	return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, page_array);
> +	return alloc_pages_bulk_noprof(gfp, nid, NULL, nr_pages, NULL, page_array);
>  }
>  
>  #define alloc_pages_bulk_node(...)				\
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index eb83cff7db8c..26274302ee01 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2537,13 +2537,13 @@ static unsigned long alloc_pages_bulk_interleave(gfp_t gfp,
>  		if (delta) {
>  			nr_allocated = alloc_pages_bulk_noprof(gfp,
>  					interleave_nodes(pol), NULL,
> -					nr_pages_per_node + 1,
> +					nr_pages_per_node + 1, NULL,
>  					page_array);
>  			delta--;
>  		} else {
>  			nr_allocated = alloc_pages_bulk_noprof(gfp,
>  					interleave_nodes(pol), NULL,
> -					nr_pages_per_node, page_array);
> +					nr_pages_per_node, NULL, page_array);
>  		}
>  
>  		page_array += nr_allocated;
> @@ -2593,7 +2593,7 @@ static unsigned long alloc_pages_bulk_weighted_interleave(gfp_t gfp,
>  	if (weight && node_isset(node, nodes)) {
>  		node_pages = min(rem_pages, weight);
>  		nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages,
> -						  page_array);
> +						  NULL, page_array);
>  		page_array += nr_allocated;
>  		total_allocated += nr_allocated;
>  		/* if that's all the pages, no need to interleave */
> @@ -2658,7 +2658,7 @@ static unsigned long alloc_pages_bulk_weighted_interleave(gfp_t gfp,
>  		if (!node_pages)
>  			break;
>  		nr_allocated = __alloc_pages_bulk(gfp, node, NULL, node_pages,
> -						  page_array);
> +						  NULL, page_array);
>  		page_array += nr_allocated;
>  		total_allocated += nr_allocated;
>  		if (total_allocated == nr_pages)
> @@ -2682,11 +2682,11 @@ static unsigned long alloc_pages_bulk_preferred_many(gfp_t gfp, int nid,
>  	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
>  
>  	nr_allocated  = alloc_pages_bulk_noprof(preferred_gfp, nid, &pol->nodes,
> -					   nr_pages, page_array);
> +					   nr_pages, NULL, page_array);
>  
>  	if (nr_allocated < nr_pages)
>  		nr_allocated += alloc_pages_bulk_noprof(gfp, numa_node_id(), NULL,
> -				nr_pages - nr_allocated,
> +				nr_pages - nr_allocated, NULL,
>  				page_array + nr_allocated);
>  	return nr_allocated;
>  }
> @@ -2722,7 +2722,7 @@ unsigned long alloc_pages_bulk_mempolicy_noprof(gfp_t gfp,
>  	nid = numa_node_id();
>  	nodemask = policy_nodemask(gfp, pol, NO_INTERLEAVE_INDEX, &nid);
>  	return alloc_pages_bulk_noprof(gfp, nid, nodemask,
> -				       nr_pages, page_array);
> +				       nr_pages, NULL, page_array);
>  }
>  
>  int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index d1d037f97c5f..a95bdd8cbf5b 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4940,23 +4940,28 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
>  }
>  
>  /*
> - * __alloc_pages_bulk - Allocate a number of order-0 pages to an array
> + * __alloc_pages_bulk - Allocate a number of order-0 pages to a list or array
>   * @gfp: GFP flags for the allocation
>   * @preferred_nid: The preferred NUMA node ID to allocate from
>   * @nodemask: Set of nodes to allocate from, may be NULL
> - * @nr_pages: The number of pages desired in the array
> - * @page_array: Array to store the pages
> + * @nr_pages: The number of pages desired on the list or array
> + * @page_list: Optional list to store the allocated pages
> + * @page_array: Optional array to store the pages
>   *
>   * This is a batched version of the page allocator that attempts to
> - * allocate nr_pages quickly. Pages are added to the page_array.
> + * allocate nr_pages quickly. Pages are added to page_list if page_list
> + * is not NULL, otherwise it is assumed that the page_array is valid.
>   *
> - * Note that only NULL elements are populated with pages and nr_pages
> + * For lists, nr_pages is the number of pages that should be allocated.
> + *
> + * For arrays, only NULL elements are populated with pages and nr_pages
>   * is the maximum number of pages that will be stored in the array.
>   *
> - * Returns the number of pages in the array.
> + * Returns the number of pages on the list or array.
>   */
>  unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  			nodemask_t *nodemask, int nr_pages,
> +			struct list_head *page_list,
>  			struct page **page_array)
>  {
>  	struct page *page;
> @@ -4974,7 +4979,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  	 * Skip populated array elements to determine if any pages need
>  	 * to be allocated before disabling IRQs.
>  	 */
> -	while (nr_populated < nr_pages && page_array[nr_populated])
> +	while (page_array && nr_populated < nr_pages && page_array[nr_populated])
>  		nr_populated++;
>  
>  	/* No pages requested? */
> @@ -4982,7 +4987,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  		goto out;
>  
>  	/* Already populated array? */
> -	if (unlikely(nr_pages - nr_populated == 0))
> +	if (unlikely(page_array && nr_pages - nr_populated == 0))
>  		goto out;
>  
>  	/* Bulk allocator does not support memcg accounting. */
> @@ -5064,7 +5069,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  	while (nr_populated < nr_pages) {
>  
>  		/* Skip existing pages */
> -		if (page_array[nr_populated]) {
> +		if (page_array && page_array[nr_populated]) {
>  			nr_populated++;
>  			continue;
>  		}
> @@ -5083,7 +5088,11 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  
>  		prep_new_page(page, 0, gfp, 0);
>  		set_page_refcounted(page);
> -		page_array[nr_populated++] = page;
> +		if (page_list)
> +			list_add(&page->lru, page_list);
> +		else
> +			page_array[nr_populated] = page;
> +		nr_populated++;
>  	}
>  
>  	pcp_spin_unlock(pcp);
> @@ -5100,8 +5109,14 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
>  
>  failed:
>  	page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask);
> -	if (page)
> -		page_array[nr_populated++] = page;
> +	if (page) {
> +		if (page_list)
> +			list_add(&page->lru, page_list);
> +		else
> +			page_array[nr_populated] = page;
> +		nr_populated++;
> +	}
> +
>  	goto out;
>  }
>  EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);