lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aPh96jn2NcqXY4IC@infradead.org>
Date: Tue, 21 Oct 2025 23:47:06 -0700
From: Christoph Hellwig <hch@...radead.org>
To: Vlastimil Babka <vbabka@...e.cz>
Cc: Christoph Hellwig <hch@...radead.org>,
	Suren Baghdasaryan <surenb@...gle.com>,
	"Liam R. Howlett" <Liam.Howlett@...cle.com>,
	Christoph Lameter <cl@...two.org>,
	David Rientjes <rientjes@...gle.com>,
	Roman Gushchin <roman.gushchin@...ux.dev>,
	Harry Yoo <harry.yoo@...cle.com>,
	Uladzislau Rezki <urezki@...il.com>,
	Sidhartha Kumar <sidhartha.kumar@...cle.com>, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org, rcu@...r.kernel.org,
	maple-tree@...ts.infradead.org, Alexei Starovoitov <ast@...nel.org>,
	Sebastian Andrzej Siewior <bigeasy@...utronix.de>,
	Venkat Rao Bagalkote <venkat88@...ux.ibm.com>,
	Qianfeng Rong <rongqianfeng@...o.com>,
	Wei Yang <richard.weiyang@...il.com>,
	"Matthew Wilcox (Oracle)" <willy@...radead.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	WangYuli <wangyuli@...ontech.com>, Jann Horn <jannh@...gle.com>,
	Pedro Falcato <pfalcato@...e.de>
Subject: Re: [PATCH v8 00/23] SLUB percpu sheaves

On Wed, Oct 15, 2025 at 10:32:44AM +0200, Vlastimil Babka wrote:
> Yeah, not a replacement for mempools which have their special semantics.
> 
> > to implement a mempool_alloc_batch to allow grabbing multiple objects
> > out of a mempool safely for something I'm working on.
> 
> I can imagine allocating multiple objects can be difficult to achieve with
> the mempool's guaranteed progress semantics. Maybe the mempool could serve
> prefilled sheaves?

It doesn't look too bad, but I'd be happy for even better versions.

This is wht I have:

---
>From 9d25a3ce6cff11b7853381921c53a51e51f27689 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@....de>
Date: Mon, 8 Sep 2025 18:22:12 +0200
Subject: mempool: add mempool_{alloc,free}_bulk

Add a version of the mempool allocator that works for batch allocations
of multiple objects.  Calling mempool_alloc in a loop is not safe
because it could deadlock if multiple threads are attemping such an
allocation at the same time.

As an extra benefit the interface is build so that the same array
can be used for alloc_pages_bulk / release_pages so that at least
for page backed mempools the fast path can use a nice batch optimization.

Still WIP, this needs proper documentation, and mempool also seems to
miss error injection to actually easily test the pool code.

Signed-off-by: Christoph Hellwig <hch@....de>
---
 include/linux/mempool.h |   6 ++
 mm/mempool.c            | 131 ++++++++++++++++++++++++----------------
 2 files changed, 86 insertions(+), 51 deletions(-)

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 34941a4b9026..59f14e94596f 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -66,9 +66,15 @@ extern void mempool_destroy(mempool_t *pool);
 extern void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask) __malloc;
 #define mempool_alloc(...)						\
 	alloc_hooks(mempool_alloc_noprof(__VA_ARGS__))
+int mempool_alloc_bulk_noprof(mempool_t *pool, void **elem,
+		unsigned int count, gfp_t gfp_mask);
+#define mempool_alloc_bulk(...)						\
+	alloc_hooks(mempool_alloc_bulk_noprof(__VA_ARGS__))
 
 extern void *mempool_alloc_preallocated(mempool_t *pool) __malloc;
 extern void mempool_free(void *element, mempool_t *pool);
+unsigned int mempool_free_bulk(mempool_t *pool, void **elem,
+		unsigned int count);
 
 /*
  * A mempool_alloc_t and mempool_free_t that get the memory from
diff --git a/mm/mempool.c b/mm/mempool.c
index 1c38e873e546..d8884aef2666 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -371,26 +371,13 @@ int mempool_resize(mempool_t *pool, int new_min_nr)
 }
 EXPORT_SYMBOL(mempool_resize);
 
-/**
- * mempool_alloc - allocate an element from a specific memory pool
- * @pool:      pointer to the memory pool which was allocated via
- *             mempool_create().
- * @gfp_mask:  the usual allocation bitmask.
- *
- * this function only sleeps if the alloc_fn() function sleeps or
- * returns NULL. Note that due to preallocation, this function
- * *never* fails when called from process contexts. (it might
- * fail if called from an IRQ context.)
- * Note: using __GFP_ZERO is not supported.
- *
- * Return: pointer to the allocated element or %NULL on error.
- */
-void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
+int mempool_alloc_bulk_noprof(mempool_t *pool, void **elem,
+		unsigned int count, gfp_t gfp_mask)
 {
-	void *element;
 	unsigned long flags;
 	wait_queue_entry_t wait;
 	gfp_t gfp_temp;
+	unsigned int i;
 
 	VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
 	might_alloc(gfp_mask);
@@ -401,15 +388,24 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
 
 	gfp_temp = gfp_mask & ~(__GFP_DIRECT_RECLAIM|__GFP_IO);
 
+	i = 0;
 repeat_alloc:
+	for (; i < count; i++) {
+		if (!elem[i])
+			elem[i] = pool->alloc(gfp_temp, pool->pool_data);
+		if (unlikely(!elem[i]))
+			goto use_pool;
+	}
 
-	element = pool->alloc(gfp_temp, pool->pool_data);
-	if (likely(element != NULL))
-		return element;
+	return 0;
 
+use_pool:
 	spin_lock_irqsave(&pool->lock, flags);
-	if (likely(pool->curr_nr)) {
-		element = remove_element(pool);
+	if (likely(pool->curr_nr >= count - i)) {
+		for (; i < count; i++) {
+			if (!elem[i])
+				elem[i] = remove_element(pool);
+		}
 		spin_unlock_irqrestore(&pool->lock, flags);
 		/* paired with rmb in mempool_free(), read comment there */
 		smp_wmb();
@@ -417,8 +413,9 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
 		 * Update the allocation stack trace as this is more useful
 		 * for debugging.
 		 */
-		kmemleak_update_trace(element);
-		return element;
+		for (i = 0; i < count; i++)
+			kmemleak_update_trace(elem[i]);
+		return 0;
 	}
 
 	/*
@@ -434,10 +431,12 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
 	/* We must not sleep if !__GFP_DIRECT_RECLAIM */
 	if (!(gfp_mask & __GFP_DIRECT_RECLAIM)) {
 		spin_unlock_irqrestore(&pool->lock, flags);
-		return NULL;
+		if (i > 0)
+			mempool_free_bulk(pool, elem + i, count - i);
+		return -ENOMEM;
 	}
 
-	/* Let's wait for someone else to return an element to @pool */
+	/* Let's wait for someone else to return elements to @pool */
 	init_wait(&wait);
 	prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
 
@@ -452,6 +451,30 @@ void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
 	finish_wait(&pool->wait, &wait);
 	goto repeat_alloc;
 }
+EXPORT_SYMBOL_GPL(mempool_alloc_bulk_noprof);
+
+/**
+ * mempool_alloc - allocate an element from a specific memory pool
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ * @gfp_mask:  the usual allocation bitmask.
+ *
+ * this function only sleeps if the alloc_fn() function sleeps or
+ * returns NULL. Note that due to preallocation, this function
+ * *never* fails when called from process contexts. (it might
+ * fail if called from an IRQ context.)
+ * Note: using __GFP_ZERO is not supported.
+ *
+ * Return: pointer to the allocated element or %NULL on error.
+ */
+void *mempool_alloc_noprof(mempool_t *pool, gfp_t gfp_mask)
+{
+	void *elem[1] = { };
+
+	if (mempool_alloc_bulk_noprof(pool, elem, 1, gfp_mask) < 0)
+		return NULL;
+	return elem[0];
+}
 EXPORT_SYMBOL(mempool_alloc_noprof);
 
 /**
@@ -491,20 +514,11 @@ void *mempool_alloc_preallocated(mempool_t *pool)
 }
 EXPORT_SYMBOL(mempool_alloc_preallocated);
 
-/**
- * mempool_free - return an element to the pool.
- * @element:   pool element pointer.
- * @pool:      pointer to the memory pool which was allocated via
- *             mempool_create().
- *
- * this function only sleeps if the free_fn() function sleeps.
- */
-void mempool_free(void *element, mempool_t *pool)
+unsigned int mempool_free_bulk(mempool_t *pool, void **elem, unsigned int count)
 {
 	unsigned long flags;
-
-	if (unlikely(element == NULL))
-		return;
+	bool added = false;
+	unsigned int freed = 0;
 
 	/*
 	 * Paired with the wmb in mempool_alloc().  The preceding read is
@@ -541,15 +555,11 @@ void mempool_free(void *element, mempool_t *pool)
 	 */
 	if (unlikely(READ_ONCE(pool->curr_nr) < pool->min_nr)) {
 		spin_lock_irqsave(&pool->lock, flags);
-		if (likely(pool->curr_nr < pool->min_nr)) {
-			add_element(pool, element);
-			spin_unlock_irqrestore(&pool->lock, flags);
-			if (wq_has_sleeper(&pool->wait))
-				wake_up(&pool->wait);
-			return;
+		while (pool->curr_nr < pool->min_nr && freed < count) {
+			add_element(pool, elem[freed++]);
+			added = true;
 		}
 		spin_unlock_irqrestore(&pool->lock, flags);
-	}
 
 	/*
 	 * Handle the min_nr = 0 edge case:
@@ -560,20 +570,39 @@ void mempool_free(void *element, mempool_t *pool)
 	 * allocation of element when both min_nr and curr_nr are 0, and
 	 * any active waiters are properly awakened.
 	 */
-	if (unlikely(pool->min_nr == 0 &&
+	} else if (unlikely(pool->min_nr == 0 &&
 		     READ_ONCE(pool->curr_nr) == 0)) {
 		spin_lock_irqsave(&pool->lock, flags);
 		if (likely(pool->curr_nr == 0)) {
-			add_element(pool, element);
-			spin_unlock_irqrestore(&pool->lock, flags);
-			if (wq_has_sleeper(&pool->wait))
-				wake_up(&pool->wait);
-			return;
+			add_element(pool, elem[freed++]);
+			added = true;
 		}
 		spin_unlock_irqrestore(&pool->lock, flags);
 	}
 
-	pool->free(element, pool->pool_data);
+	if (unlikely(added) && wq_has_sleeper(&pool->wait))
+		wake_up(&pool->wait);
+
+	return freed;
+}
+EXPORT_SYMBOL_GPL(mempool_free_bulk);
+
+/**
+ * mempool_free - return an element to the pool.
+ * @element:   pool element pointer.
+ * @pool:      pointer to the memory pool which was allocated via
+ *             mempool_create().
+ *
+ * this function only sleeps if the free_fn() function sleeps.
+ */
+void mempool_free(void *element, mempool_t *pool)
+{
+	if (likely(element)) {
+		void *elem[1] = { element };
+
+		if (!mempool_free_bulk(pool, elem, 1))
+			pool->free(element, pool->pool_data);
+	}
 }
 EXPORT_SYMBOL(mempool_free);
 
-- 
2.47.3


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ