linux-kernel - Re: [PATCH 7/7] mempool: add mempool_{alloc,free}

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c691c9c1-a513-4db3-95f6-3d24111680b7@suse.cz>
Date: Wed, 12 Nov 2025 13:20:21 +0100
From: Vlastimil Babka <vbabka@...e.cz>
To: Christoph Hellwig <hch@....de>, Andrew Morton <akpm@...ux-foundation.org>
Cc: Christoph Lameter <cl@...two.org>, David Rientjes <rientjes@...gle.com>,
 Roman Gushchin <roman.gushchin@...ux.dev>, Harry Yoo <harry.yoo@...cle.com>,
 Eric Biggers <ebiggers@...nel.org>, linux-mm@...ck.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH 7/7] mempool: add mempool_{alloc,free}_bulk

On 11/11/25 14:52, Christoph Hellwig wrote:
> Add a version of the mempool allocator that works for batch allocations
> of multiple objects.  Calling mempool_alloc in a loop is not safe because
> it could deadlock if multiple threads are performing such an allocation
> at the same time.
> 
> As an extra benefit the interface is build so that the same array can be
> used for alloc_pages_bulk / release_pages so that at least for page
> backed mempools the fast path can use a nice batch optimization.
> 
> Signed-off-by: Christoph Hellwig <hch@....de>
> ---
>  include/linux/mempool.h |   7 ++
>  mm/mempool.c            | 182 ++++++++++++++++++++++++++++++----------
>  2 files changed, 145 insertions(+), 44 deletions(-)
> 
> diff --git a/include/linux/mempool.h b/include/linux/mempool.h
> index 34941a4b9026..486ed50776db 100644
> --- a/include/linux/mempool.h
> +++ b/include/linux/mempool.h
> @@ -66,9 +66,16 @@ extern void mempool_destroy(mempool_t *pool);
>  extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc;
>  #define mempool_alloc(...)						\
>  	alloc_hooks(mempool_alloc_noprof(__VA_ARGS__))
> +int mempool_alloc_bulk_noprof(mempool_t *pool, void **elem,
> +		unsigned int count, gfp_t gfp_mask, unsigned long caller_ip);
> +#define mempool_alloc_bulk(pool, elem, count, gfp_mask)			\
> +	alloc_hooks(mempool_alloc_bulk_noprof(pool, elem, count, gfp_mask, \
> +			_RET_IP_))
>  
>  extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc;
>  extern void mempool_free(void *element, mempool_t *pool);
> +unsigned int mempool_free_bulk(mempool_t *pool, void **elem,
> +		unsigned int count);
>  
>  /*
>   * A mempool_alloc_t and mempool_free_t that get the memory from
> diff --git a/mm/mempool.c b/mm/mempool.c
> index 8cf3b5705b7f..e2f05bec633b 100644
> --- a/mm/mempool.c
> +++ b/mm/mempool.c
> @@ -21,11 +21,16 @@
>  #include "slab.h"
>  
>  static DECLARE_FAULT_ATTR(fail_mempool_alloc);
> +static DECLARE_FAULT_ATTR(fail_mempool_alloc_bulk);
>  
>  static int __init mempool_faul_inject_init(void)
>  {
> -	return PTR_ERR_OR_ZERO(fault_create_debugfs_attr("fail_mempool_alloc",
> -			NULL, &fail_mempool_alloc));
> +	if (IS_ERR(fault_create_debugfs_attr("fail_mempool_alloc", NULL,
> +			&fail_mempool_alloc)) ||
> +	    IS_ERR(fault_create_debugfs_attr("fail_mempool_alloc_bulk", NULL,
> +			&fail_mempool_alloc_bulk)))
> +		return -ENOMEM;

Pedantically speaking the error (from debugfs_create_dir()) might be
different, probably doesn't matter in practice.

> +	return 0;
>  }
>  late_initcall(mempool_faul_inject_init);
>  
> @@ -380,16 +385,21 @@ int mempool_resize(mempool_t *pool, int new_min_nr)
>  }
>  EXPORT_SYMBOL(mempool_resize);
>  
> -static void *mempool_alloc_from_pool(struct mempool *pool, gfp_t gfp_mask)
> +static bool mempool_alloc_from_pool(struct mempool *pool, void **elems,
> +		unsigned int count, unsigned int allocated,
> +		gfp_t gfp_mask)
>  {
>  	unsigned long flags;
> -	void *element;
> +	unsigned int i;
>  
>  	spin_lock_irqsave(&pool->lock, flags);
> -	if (unlikely(!pool->curr_nr))
> +	if (unlikely(pool->curr_nr < count - allocated))

So we might be pessimistic here when some of the elements in the array
already are not NULL so we need in fact less. Might be an issue if callers
were relying on this for forward progress? It would be simpler to just tell
them not to...

>  		goto fail;
>  alloc:
> -	element = remove_element(pool);
> +	for (; allocated < count; allocated++) {
> +		if (!elems[allocated])
> +			elems[allocated] = remove_element(pool);
> +	}
>  	spin_unlock_irqrestore(&pool->lock, flags);
>  
>  	/* Paired with rmb in mempool_free(), read comment there. */
> @@ -399,15 +409,16 @@ static void *mempool_alloc_from_pool(struct mempool *pool, gfp_t gfp_mask)
>  	 * Update the allocation stack trace as this is more useful for
>  	 * debugging.
>  	 */
> -	kmemleak_update_trace(element);
> -	return element;
> +	for (i = 0; i < count; i++)
> +		kmemleak_update_trace(elems[i]);
> +	return true;
>  
>  fail:
>  	if (gfp_mask & __GFP_DIRECT_RECLAIM) {
>  		DEFINE_WAIT(wait);
>  
>  		prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
> -		if (pool->curr_nr) {
> +		if (pool->curr_nr >= count - allocated) {
>  			finish_wait(&pool->wait, &wait);
>  			goto alloc;
>  		}
> @@ -426,7 +437,7 @@ static void *mempool_alloc_from_pool(struct mempool *pool, gfp_t gfp_mask)
>  		spin_unlock_irqrestore(&pool->lock, flags);
>  	}
>  
> -	return NULL;
> +	return false;
>  }
>  
>  /*
> @@ -442,6 +453,72 @@ static inline gfp_t mempool_adjust_gfp(gfp_t *gfp_mask)
>  	return *gfp_mask & ~(__GFP_DIRECT_RECLAIM | __GFP_IO);
>  }
>  
> +/**
> + * mempool_alloc_bulk - allocate multiple elements from a memory pool
> + * @pool:	pointer to the memory pool
> + * @elems:	partially or fully populated elements array
> + * @count:	size (in entries) of @elem
> + * @gfp_mask:	GFP_* flags.  %__GFP_ZERO is not supported.

We should say __GFP_DIRECT_RECLAIM is mandatory...

> + *
> + * Allocate elements for each slot in @elem that is non-%NULL. This is done by
> + * first calling into the alloc_fn supplied at pool initialization time, and
> + * dipping into the reserved pool when alloc_fn fails to allocate an element.
> + *
> + * This function only sleeps if the alloc_fn callback sleeps, or when waiting
> + * for elements to become available in the pool.
> + *
> + * Return: Always 0.  If it wasn't for %$#^$ alloc tags, it would return void.

My sympathies :) But see below.

> + */
> +int mempool_alloc_bulk_noprof(struct mempool *pool, void **elems,
> +		unsigned int count, gfp_t gfp_mask, unsigned long caller_ip)
> +{
> +	gfp_t gfp_temp = mempool_adjust_gfp(&gfp_mask);
> +	unsigned int i = 0;
> +
> +	VM_WARN_ON_ONCE(count > pool->min_nr);
> +	VM_WARN_ON_ONCE(!(gfp_mask & __GFP_DIRECT_RECLAIM));

... per here.

> +	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
> +	might_alloc(gfp_mask);
> +
> +	/*
> +	 * If an error is injected, fail all elements in a bulk allocation so
> +	 * that we stress the multiple elements missing path.
> +	 */
> +	if (should_fail_ex(&fail_mempool_alloc_bulk, 1, FAULT_NOWARN)) {
> +		pr_info("forcing mempool usage for pool %pS\n",
> +				(void *)_RET_IP_);
> +		goto use_pool;
> +	}
> +
> +repeat_alloc:
> +	/*
> +	 * Try to allocate the elements using the allocation callback.  If that
> +	 * succeeds or we were not allowed to sleep, return now.  Don't dip into
> +	 * the reserved pools for !__GFP_DIRECT_RECLAIM allocations as they
> +	 * aren't guaranteed to succeed and chances of getting an allocation
> +	 * from the allocators using GFP_ATOMIC is higher than stealing one of
> +	 * the few items from our usually small pool.
> +	 */

Hm but the code doesn't do what the comment says, AFAICS? It will try
dipping into the pool and might succeed if there are elements, only will not
wait for them there?

> +	for (; i < count; i++) {
> +		if (!elems[i]) {
> +			elems[i] = pool->alloc(gfp_temp, pool->pool_data);
> +			if (unlikely(!elems[i]))
> +				goto use_pool;
> +		}
> +	}
> +
> +	return 0;
> +
> +use_pool:

So should we bail out here with -ENOMEM when !(gfp_mask & __GFP_DIRECT_RECLAIM)?

> +	if (!mempool_alloc_from_pool(pool, elems, count, i, gfp_temp)) {
> +		gfp_temp = gfp_mask;
> +		goto repeat_alloc;

Because this seems to be an infinite loop otherwise?

> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(mempool_alloc_bulk_noprof);
> +