lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250529144256.4517-1-00107082@163.com>
Date: Thu, 29 May 2025 22:42:56 +0800
From: David Wang <00107082@....com>
To: tytso@....edu,
	adilger.kernel@...ger.ca
Cc: linux-ext4@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	David Wang <00107082@....com>
Subject: [RFC] ext4: use kmem_cache for short fname allocation in readdir

When searching files, ext4_readdir would kzalloc() a fname
object for each entry. It would be faster if a dedicated
kmem_cache is used for fname.

But fnames are of variable length.

This patch suggests using kmem_cache for fname with short
length, and resorting to kzalloc when fname needs larger buffer.
Assuming long file names are not very common.

Profiling when searching files in kernel code base, with following
command:
	# perf record -g -e cpu-clock --freq=max bash -c \
	"for i in {1..100}; do find ./linux -name notfoundatall > /dev/null; done"
And using sample counts as indicator of performance improvement.
(The faster, the less samples collected. And the tests are carried out
when system is under no memory pressure.)

Before without the change:
	1232868--ext4_readdir
		 |
		 |--839085--ext4_htree_fill_tree
		 |          |
		 |           --829223--htree_dirblock_to_tree
		 |                     |
		 |                     |--365869--ext4_htree_store_dirent
		 |                     |          |
		 |                     |          |--43169--0xffffffffa7f8d094
		 |                     |          |
		 |                     |           --21947--0xffffffffa7f8d0f7
		 |                     |
		 |                     |--213124--ext4fs_dirhash
		 |                     |          |
		 |                     |           --86339--str2hashbuf_signed
		 |                     |
		 |                     |--145839--__ext4_read_dirblock

and with the change, ~3% less samples:
	1202922--ext4_readdir
		 |
		 |--805105--ext4_htree_fill_tree
		 |          |
		 |           --795055--htree_dirblock_to_tree
		 |                     |
		 |                     |--328876--ext4_htree_store_dirent
		 |                     |          |
		 |                     |          |--123207--kmem_cache_alloc_noprof
		 |                     |          |          |
		 |                     |          |          |--26453--__alloc_tagging_slab_alloc_hook
		 |                     |          |          |
		 |                     |          |           --20413--__slab_alloc.isra.0
		 |                     |          |
		 |                     |           --31566--rb_insert_color
		 |                     |
		 |                     |--212915--ext4fs_dirhash
		 |                     |          |
		 |                     |           --86004--str2hashbuf_signed
		 |                     |
		 |                     |--149146--__ext4_read_dirblock

readdir() would have sigfinicant improvement, but the overall
improvements for searching files is only ~0.5%, might be more
sigfinicant if the system is under some memory pressures.

The slab stats after the test:
ext4_dir_fname      1242   1242    176   23    1 : tunables    0    0    0 : slabdata     54     54      0

Signed-off-by: David Wang <00107082@....com>
---
 fs/ext4/dir.c   | 47 ++++++++++++++++++++++++++++++++++++++++++++---
 fs/ext4/ext4.h  |  4 ++++
 fs/ext4/super.c |  3 +++
 3 files changed, 51 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d4164c507a90..3adfa0d038cd 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -424,6 +424,48 @@ struct fname {
 	char		name[] __counted_by(name_len);
 };
 
+#define EXT4_DIR_FNAME_SHORT_LENGTH 127
+static struct kmem_cache *ext4_dir_fname_cachep;
+
+void __init ext4_init_dir(void)
+{
+	ext4_dir_fname_cachep = kmem_cache_create_usercopy("ext4_dir_fname",
+			struct_size_t(struct fname, name,  EXT4_DIR_FNAME_SHORT_LENGTH + 1),
+			0, SLAB_RECLAIM_ACCOUNT,
+			offsetof(struct fname, name), EXT4_DIR_FNAME_SHORT_LENGTH + 1,
+			NULL);
+}
+
+void ext4_exit_dir(void)
+{
+	if (ext4_dir_fname_cachep)
+		kmem_cache_destroy(ext4_dir_fname_cachep);
+}
+
+static struct fname *rb_node_fname_zalloc(__u8 name_len)
+{
+	struct fname *p;
+	if (ext4_dir_fname_cachep && name_len <= EXT4_DIR_FNAME_SHORT_LENGTH)
+		p = kmem_cache_alloc(ext4_dir_fname_cachep, GFP_KERNEL);
+	else
+		p = kmalloc(struct_size(p, name, name_len + 1), GFP_KERNEL);
+	if (p) {
+		/* no need to fill name with zeroes*/
+		memset(p, 0, offsetof(struct fname, name));
+		p->name[name_len] = 0;
+	}
+	return p;
+}
+
+static void rb_node_fname_free(struct fname *p) {
+	if (!p)
+		return;
+	if (ext4_dir_fname_cachep && p->name_len <= EXT4_DIR_FNAME_SHORT_LENGTH)
+		kmem_cache_free(ext4_dir_fname_cachep, p);
+	else
+		kfree(p);
+}
+
 /*
  * This function implements a non-recursive way of freeing all of the
  * nodes in the red-black tree.
@@ -436,7 +478,7 @@ static void free_rb_tree_fname(struct rb_root *root)
 		while (fname) {
 			struct fname *old = fname;
 			fname = fname->next;
-			kfree(old);
+			rb_node_fname_free(old);
 		}
 
 	*root = RB_ROOT;
@@ -479,8 +521,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 	p = &info->root.rb_node;
 
 	/* Create and allocate the fname structure */
-	new_fn = kzalloc(struct_size(new_fn, name, ent_name->len + 1),
-			 GFP_KERNEL);
+	new_fn = rb_node_fname_zalloc(ent_name->len);
 	if (!new_fn)
 		return -ENOMEM;
 	new_fn->hash = hash;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5a20e9cd7184..33ab97143000 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3795,6 +3795,10 @@ extern void ext4_orphan_file_block_trigger(
 				struct buffer_head *bh,
 				void *data, size_t size);
 
+/* dir.c */
+extern void __init ext4_init_dir(void);
+extern void ext4_exit_dir(void);
+
 /*
  * Add new method to test whether block and inode bitmaps are properly
  * initialized. With uninit_bg reading the block from disk is not enough
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 181934499624..21ce3d78912a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -7457,6 +7457,8 @@ static int __init ext4_init_fs(void)
 	if (err)
 		goto out;
 
+	ext4_init_dir();
+
 	return 0;
 out:
 	unregister_as_ext2();
@@ -7497,6 +7499,7 @@ static void __exit ext4_exit_fs(void)
 	ext4_exit_post_read_processing();
 	ext4_exit_es();
 	ext4_exit_pending();
+	ext4_exit_dir();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ