[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250529144256.4517-1-00107082@163.com>
Date: Thu, 29 May 2025 22:42:56 +0800
From: David Wang <00107082@....com>
To: tytso@....edu,
adilger.kernel@...ger.ca
Cc: linux-ext4@...r.kernel.org,
linux-kernel@...r.kernel.org,
David Wang <00107082@....com>
Subject: [RFC] ext4: use kmem_cache for short fname allocation in readdir
When searching files, ext4_readdir would kzalloc() a fname
object for each entry. It would be faster if a dedicated
kmem_cache is used for fname.
But fnames are of variable length.
This patch suggests using kmem_cache for fname with short
length, and resorting to kzalloc when fname needs larger buffer.
Assuming long file names are not very common.
Profiling when searching files in kernel code base, with following
command:
# perf record -g -e cpu-clock --freq=max bash -c \
"for i in {1..100}; do find ./linux -name notfoundatall > /dev/null; done"
And using sample counts as indicator of performance improvement.
(The faster, the less samples collected. And the tests are carried out
when system is under no memory pressure.)
Before without the change:
1232868--ext4_readdir
|
|--839085--ext4_htree_fill_tree
| |
| --829223--htree_dirblock_to_tree
| |
| |--365869--ext4_htree_store_dirent
| | |
| | |--43169--0xffffffffa7f8d094
| | |
| | --21947--0xffffffffa7f8d0f7
| |
| |--213124--ext4fs_dirhash
| | |
| | --86339--str2hashbuf_signed
| |
| |--145839--__ext4_read_dirblock
and with the change, ~3% less samples:
1202922--ext4_readdir
|
|--805105--ext4_htree_fill_tree
| |
| --795055--htree_dirblock_to_tree
| |
| |--328876--ext4_htree_store_dirent
| | |
| | |--123207--kmem_cache_alloc_noprof
| | | |
| | | |--26453--__alloc_tagging_slab_alloc_hook
| | | |
| | | --20413--__slab_alloc.isra.0
| | |
| | --31566--rb_insert_color
| |
| |--212915--ext4fs_dirhash
| | |
| | --86004--str2hashbuf_signed
| |
| |--149146--__ext4_read_dirblock
readdir() would have sigfinicant improvement, but the overall
improvements for searching files is only ~0.5%, might be more
sigfinicant if the system is under some memory pressures.
The slab stats after the test:
ext4_dir_fname 1242 1242 176 23 1 : tunables 0 0 0 : slabdata 54 54 0
Signed-off-by: David Wang <00107082@....com>
---
fs/ext4/dir.c | 47 ++++++++++++++++++++++++++++++++++++++++++++---
fs/ext4/ext4.h | 4 ++++
fs/ext4/super.c | 3 +++
3 files changed, 51 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d4164c507a90..3adfa0d038cd 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -424,6 +424,48 @@ struct fname {
char name[] __counted_by(name_len);
};
+#define EXT4_DIR_FNAME_SHORT_LENGTH 127
+static struct kmem_cache *ext4_dir_fname_cachep;
+
+void __init ext4_init_dir(void)
+{
+ ext4_dir_fname_cachep = kmem_cache_create_usercopy("ext4_dir_fname",
+ struct_size_t(struct fname, name, EXT4_DIR_FNAME_SHORT_LENGTH + 1),
+ 0, SLAB_RECLAIM_ACCOUNT,
+ offsetof(struct fname, name), EXT4_DIR_FNAME_SHORT_LENGTH + 1,
+ NULL);
+}
+
+void ext4_exit_dir(void)
+{
+ if (ext4_dir_fname_cachep)
+ kmem_cache_destroy(ext4_dir_fname_cachep);
+}
+
+static struct fname *rb_node_fname_zalloc(__u8 name_len)
+{
+ struct fname *p;
+ if (ext4_dir_fname_cachep && name_len <= EXT4_DIR_FNAME_SHORT_LENGTH)
+ p = kmem_cache_alloc(ext4_dir_fname_cachep, GFP_KERNEL);
+ else
+ p = kmalloc(struct_size(p, name, name_len + 1), GFP_KERNEL);
+ if (p) {
+ /* no need to fill name with zeroes*/
+ memset(p, 0, offsetof(struct fname, name));
+ p->name[name_len] = 0;
+ }
+ return p;
+}
+
+static void rb_node_fname_free(struct fname *p) {
+ if (!p)
+ return;
+ if (ext4_dir_fname_cachep && p->name_len <= EXT4_DIR_FNAME_SHORT_LENGTH)
+ kmem_cache_free(ext4_dir_fname_cachep, p);
+ else
+ kfree(p);
+}
+
/*
* This function implements a non-recursive way of freeing all of the
* nodes in the red-black tree.
@@ -436,7 +478,7 @@ static void free_rb_tree_fname(struct rb_root *root)
while (fname) {
struct fname *old = fname;
fname = fname->next;
- kfree(old);
+ rb_node_fname_free(old);
}
*root = RB_ROOT;
@@ -479,8 +521,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
p = &info->root.rb_node;
/* Create and allocate the fname structure */
- new_fn = kzalloc(struct_size(new_fn, name, ent_name->len + 1),
- GFP_KERNEL);
+ new_fn = rb_node_fname_zalloc(ent_name->len);
if (!new_fn)
return -ENOMEM;
new_fn->hash = hash;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5a20e9cd7184..33ab97143000 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3795,6 +3795,10 @@ extern void ext4_orphan_file_block_trigger(
struct buffer_head *bh,
void *data, size_t size);
+/* dir.c */
+extern void __init ext4_init_dir(void);
+extern void ext4_exit_dir(void);
+
/*
* Add new method to test whether block and inode bitmaps are properly
* initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 181934499624..21ce3d78912a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -7457,6 +7457,8 @@ static int __init ext4_init_fs(void)
if (err)
goto out;
+ ext4_init_dir();
+
return 0;
out:
unregister_as_ext2();
@@ -7497,6 +7499,7 @@ static void __exit ext4_exit_fs(void)
ext4_exit_post_read_processing();
ext4_exit_es();
ext4_exit_pending();
+ ext4_exit_dir();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
--
2.39.2
Powered by blists - more mailing lists