[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260115021108.1913695-1-guzebing1612@gmail.com>
Date: Thu, 15 Jan 2026 10:11:08 +0800
From: guzebing <guzebing1612@...il.com>
To: brauner@...nel.org,
djwong@...nel.org
Cc: hch@...radead.org,
linux-xfs@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org,
guzebing@...edance.com,
guzebing <guzebing1612@...il.com>,
syzbot@...kaller.appspotmail.com,
Fengnan Chang <changfengnan@...edance.com>
Subject: [PATCH v3] iomap: add allocation cache for iomap_dio
As implemented by the bio structure, we do the same thing on the
iomap-dio structure. Add a per-cpu cache for iomap_dio allocations,
enabling us to quickly recycle them instead of going through the slab
allocator.
By making such changes, we can reduce memory allocation on the direct
IO path, so that direct IO will not block due to insufficient system
memory. In addition, for direct IO, the read performance of io_uring
is improved by about 2.6%.
v3:
kmalloc now is called outside the get_cpu/put_cpu code section.
v2:
Factor percpu cache into common code and the iomap module uses it.
v1:
https://lore.kernel.org/all/20251121090052.384823-1-guzebing1612@gmail.com/
Tested-by: syzbot@...kaller.appspotmail.com
Suggested-by: Fengnan Chang <changfengnan@...edance.com>
Signed-off-by: guzebing <guzebing1612@...il.com>
---
fs/iomap/direct-io.c | 133 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 130 insertions(+), 3 deletions(-)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 5d5d63efbd57..4421e4ad3a8f 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -56,6 +56,130 @@ struct iomap_dio {
};
};
+#define PCPU_CACHE_IRQ_THRESHOLD 16
+#define PCPU_CACHE_ELEMENT_SIZE(pcpu_cache_list) \
+ (sizeof(struct pcpu_cache_element) + pcpu_cache_list->element_size)
+#define PCPU_CACHE_ELEMENT_GET_HEAD_FROM_PAYLOAD(payload) \
+ ((struct pcpu_cache_element *)((unsigned long)(payload) - \
+ sizeof(struct pcpu_cache_element)))
+#define PCPU_CACHE_ELEMENT_GET_PAYLOAD_FROM_HEAD(head) \
+ ((void *)((unsigned long)(head) + sizeof(struct pcpu_cache_element)))
+
+struct pcpu_cache_element {
+ struct pcpu_cache_element *next;
+ char payload[];
+};
+struct pcpu_cache {
+ struct pcpu_cache_element *free_list;
+ struct pcpu_cache_element *free_list_irq;
+ int nr;
+ int nr_irq;
+};
+struct pcpu_cache_list {
+ struct pcpu_cache __percpu *cache;
+ size_t element_size;
+ int max_nr;
+};
+
+static struct pcpu_cache_list *pcpu_cache_list_create(int max_nr, size_t size)
+{
+ struct pcpu_cache_list *pcpu_cache_list;
+
+ pcpu_cache_list = kmalloc(sizeof(struct pcpu_cache_list), GFP_KERNEL);
+ if (!pcpu_cache_list)
+ return NULL;
+
+ pcpu_cache_list->element_size = size;
+ pcpu_cache_list->max_nr = max_nr;
+ pcpu_cache_list->cache = alloc_percpu(struct pcpu_cache);
+ if (!pcpu_cache_list->cache) {
+ kfree(pcpu_cache_list);
+ return NULL;
+ }
+ return pcpu_cache_list;
+}
+
+static void pcpu_cache_list_destroy(struct pcpu_cache_list *pcpu_cache_list)
+{
+ free_percpu(pcpu_cache_list->cache);
+ kfree(pcpu_cache_list);
+}
+
+static void irq_cache_splice(struct pcpu_cache *cache)
+{
+ unsigned long flags;
+
+ /* cache->free_list must be empty */
+ if (WARN_ON_ONCE(cache->free_list))
+ return;
+
+ local_irq_save(flags);
+ cache->free_list = cache->free_list_irq;
+ cache->free_list_irq = NULL;
+ cache->nr += cache->nr_irq;
+ cache->nr_irq = 0;
+ local_irq_restore(flags);
+}
+
+static void *pcpu_cache_list_alloc(struct pcpu_cache_list *pcpu_cache_list)
+{
+ struct pcpu_cache *cache;
+ struct pcpu_cache_element *cache_element;
+
+ cache = per_cpu_ptr(pcpu_cache_list->cache, get_cpu());
+ if (!cache->free_list) {
+ if (READ_ONCE(cache->nr_irq) >= PCPU_CACHE_IRQ_THRESHOLD)
+ irq_cache_splice(cache);
+ if (!cache->free_list) {
+ put_cpu();
+ cache_element = kmalloc(PCPU_CACHE_ELEMENT_SIZE(pcpu_cache_list),
+ GFP_KERNEL);
+ if (!cache_element)
+ return NULL;
+ return PCPU_CACHE_ELEMENT_GET_PAYLOAD_FROM_HEAD(cache_element);
+ }
+ }
+
+ cache_element = cache->free_list;
+ cache->free_list = cache_element->next;
+ cache->nr--;
+ put_cpu();
+ return PCPU_CACHE_ELEMENT_GET_PAYLOAD_FROM_HEAD(cache_element);
+}
+
+static void pcpu_cache_list_free(void *payload, struct pcpu_cache_list *pcpu_cache_list)
+{
+ struct pcpu_cache *cache;
+ struct pcpu_cache_element *cache_element;
+
+ cache_element = PCPU_CACHE_ELEMENT_GET_HEAD_FROM_PAYLOAD(payload);
+
+ cache = per_cpu_ptr(pcpu_cache_list->cache, get_cpu());
+ if (READ_ONCE(cache->nr_irq) + cache->nr >= pcpu_cache_list->max_nr)
+ goto out_free;
+
+ if (in_task()) {
+ cache_element->next = cache->free_list;
+ cache->free_list = cache_element;
+ cache->nr++;
+ } else if (in_hardirq()) {
+ lockdep_assert_irqs_disabled();
+ cache_element->next = cache->free_list_irq;
+ cache->free_list_irq = cache_element;
+ cache->nr_irq++;
+ } else {
+ goto out_free;
+ }
+ put_cpu();
+ return;
+out_free:
+ put_cpu();
+ kfree(cache_element);
+}
+
+#define DIO_ALLOC_CACHE_MAX 256
+static struct pcpu_cache_list *dio_pcpu_cache_list;
+
static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
struct iomap_dio *dio, unsigned short nr_vecs, blk_opf_t opf)
{
@@ -135,7 +259,7 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
ret += dio->done_before;
}
trace_iomap_dio_complete(iocb, dio->error, ret);
- kfree(dio);
+ pcpu_cache_list_free(dio, dio_pcpu_cache_list);
return ret;
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
@@ -620,7 +744,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (!iomi.len)
return NULL;
- dio = kmalloc(sizeof(*dio), GFP_KERNEL);
+ dio = pcpu_cache_list_alloc(dio_pcpu_cache_list);
if (!dio)
return ERR_PTR(-ENOMEM);
@@ -804,7 +928,7 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
return dio;
out_free_dio:
- kfree(dio);
+ pcpu_cache_list_free(dio, dio_pcpu_cache_list);
if (ret)
return ERR_PTR(ret);
return NULL;
@@ -834,6 +958,9 @@ static int __init iomap_dio_init(void)
if (!zero_page)
return -ENOMEM;
+ dio_pcpu_cache_list = pcpu_cache_list_create(DIO_ALLOC_CACHE_MAX, sizeof(struct iomap_dio));
+ if (!dio_pcpu_cache_list)
+ return -ENOMEM;
return 0;
}
fs_initcall(iomap_dio_init);
--
2.20.1
Powered by blists - more mailing lists