[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <gvy6graln2rfpkaa3ms7lx7wlzzpjruzvefagsnnf3vcdfoxq7@dlsvj2gw6huk>
Date: Fri, 27 Jun 2025 21:11:32 +0200
From: Jan Kara <jack@...e.cz>
To: Baokun Li <libaokun1@...wei.com>
Cc: linux-ext4@...r.kernel.org, tytso@....edu, jack@...e.cz,
adilger.kernel@...ger.ca, ojaswin@...ux.ibm.com, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH v2 08/16] ext4: merge freed extent with existing extents
before insertion
On Mon 23-06-25 15:32:56, Baokun Li wrote:
> Attempt to merge ext4_free_data with already inserted free extents prior
> to adding new ones. This strategy drastically cuts down the number of
> times locks are held.
>
> For example, if prev, new, and next extents are all mergeable, the existing
> code (before this patch) requires acquiring the s_md_lock three times:
>
> prev merge into new and free prev // hold lock
> next merge into new and free next // hold lock
> insert new // hold lock
>
> After the patch, it only needs to be acquired once:
>
> new merge next and free new // no lock
> next merge into prev and free prev // hold lock
>
> Performance test data follows:
>
> Test: Running will-it-scale/fallocate2 on CPU-bound containers.
> Observation: Average fallocate operations per container per second.
>
> | Kunpeng 920 / 512GB -P80| AMD 9654 / 1536GB -P96 |
> Disk: 960GB SSD |-------------------------|-------------------------|
> | base | patched | base | patched |
> -------------------|-------|-----------------|-------|-----------------|
> mb_optimize_scan=0 | 20982 | 21157 (+0.8%) | 50629 | 50420 (-0.4%) |
> mb_optimize_scan=1 | 10703 | 12896 (+20.4%) | 14856 | 17273 (+16.2%) |
>
> Signed-off-by: Baokun Li <libaokun1@...wei.com>
Looks good. Feel free to add:
Reviewed-by: Jan Kara <jack@...e.cz>
Honza
> ---
> fs/ext4/mballoc.c | 113 +++++++++++++++++++++++++++++++---------------
> 1 file changed, 76 insertions(+), 37 deletions(-)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 5410fb3688ee..94950b07a577 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -6298,28 +6298,63 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
> * are contiguous, AND the extents were freed by the same transaction,
> * AND the blocks are associated with the same group.
> */
> -static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
> - struct ext4_free_data *entry,
> - struct ext4_free_data *new_entry,
> - struct rb_root *entry_rb_root)
> +static inline bool
> +ext4_freed_extents_can_be_merged(struct ext4_free_data *entry1,
> + struct ext4_free_data *entry2)
> {
> - if ((entry->efd_tid != new_entry->efd_tid) ||
> - (entry->efd_group != new_entry->efd_group))
> - return;
> - if (entry->efd_start_cluster + entry->efd_count ==
> - new_entry->efd_start_cluster) {
> - new_entry->efd_start_cluster = entry->efd_start_cluster;
> - new_entry->efd_count += entry->efd_count;
> - } else if (new_entry->efd_start_cluster + new_entry->efd_count ==
> - entry->efd_start_cluster) {
> - new_entry->efd_count += entry->efd_count;
> - } else
> - return;
> + if (entry1->efd_tid != entry2->efd_tid)
> + return false;
> + if (entry1->efd_start_cluster + entry1->efd_count !=
> + entry2->efd_start_cluster)
> + return false;
> + if (WARN_ON_ONCE(entry1->efd_group != entry2->efd_group))
> + return false;
> + return true;
> +}
> +
> +static inline void
> +ext4_merge_freed_extents(struct ext4_sb_info *sbi, struct rb_root *root,
> + struct ext4_free_data *entry1,
> + struct ext4_free_data *entry2)
> +{
> + entry1->efd_count += entry2->efd_count;
> spin_lock(&sbi->s_md_lock);
> - list_del(&entry->efd_list);
> + list_del(&entry2->efd_list);
> spin_unlock(&sbi->s_md_lock);
> - rb_erase(&entry->efd_node, entry_rb_root);
> - kmem_cache_free(ext4_free_data_cachep, entry);
> + rb_erase(&entry2->efd_node, root);
> + kmem_cache_free(ext4_free_data_cachep, entry2);
> +}
> +
> +static inline void
> +ext4_try_merge_freed_extent_prev(struct ext4_sb_info *sbi, struct rb_root *root,
> + struct ext4_free_data *entry)
> +{
> + struct ext4_free_data *prev;
> + struct rb_node *node;
> +
> + node = rb_prev(&entry->efd_node);
> + if (!node)
> + return;
> +
> + prev = rb_entry(node, struct ext4_free_data, efd_node);
> + if (ext4_freed_extents_can_be_merged(prev, entry))
> + ext4_merge_freed_extents(sbi, root, prev, entry);
> +}
> +
> +static inline void
> +ext4_try_merge_freed_extent_next(struct ext4_sb_info *sbi, struct rb_root *root,
> + struct ext4_free_data *entry)
> +{
> + struct ext4_free_data *next;
> + struct rb_node *node;
> +
> + node = rb_next(&entry->efd_node);
> + if (!node)
> + return;
> +
> + next = rb_entry(node, struct ext4_free_data, efd_node);
> + if (ext4_freed_extents_can_be_merged(entry, next))
> + ext4_merge_freed_extents(sbi, root, entry, next);
> }
>
> static noinline_for_stack void
> @@ -6329,11 +6364,12 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
> ext4_group_t group = e4b->bd_group;
> ext4_grpblk_t cluster;
> ext4_grpblk_t clusters = new_entry->efd_count;
> - struct ext4_free_data *entry;
> + struct ext4_free_data *entry = NULL;
> struct ext4_group_info *db = e4b->bd_info;
> struct super_block *sb = e4b->bd_sb;
> struct ext4_sb_info *sbi = EXT4_SB(sb);
> - struct rb_node **n = &db->bb_free_root.rb_node, *node;
> + struct rb_root *root = &db->bb_free_root;
> + struct rb_node **n = &root->rb_node;
> struct rb_node *parent = NULL, *new_node;
>
> BUG_ON(!ext4_handle_valid(handle));
> @@ -6369,27 +6405,30 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
> }
> }
>
> - rb_link_node(new_node, parent, n);
> - rb_insert_color(new_node, &db->bb_free_root);
> -
> - /* Now try to see the extent can be merged to left and right */
> - node = rb_prev(new_node);
> - if (node) {
> - entry = rb_entry(node, struct ext4_free_data, efd_node);
> - ext4_try_merge_freed_extent(sbi, entry, new_entry,
> - &(db->bb_free_root));
> + atomic_add(clusters, &sbi->s_mb_free_pending);
> + if (!entry)
> + goto insert;
> +
> + /* Now try to see the extent can be merged to prev and next */
> + if (ext4_freed_extents_can_be_merged(new_entry, entry)) {
> + entry->efd_start_cluster = cluster;
> + entry->efd_count += new_entry->efd_count;
> + kmem_cache_free(ext4_free_data_cachep, new_entry);
> + ext4_try_merge_freed_extent_prev(sbi, root, entry);
> + return;
> }
> -
> - node = rb_next(new_node);
> - if (node) {
> - entry = rb_entry(node, struct ext4_free_data, efd_node);
> - ext4_try_merge_freed_extent(sbi, entry, new_entry,
> - &(db->bb_free_root));
> + if (ext4_freed_extents_can_be_merged(entry, new_entry)) {
> + entry->efd_count += new_entry->efd_count;
> + kmem_cache_free(ext4_free_data_cachep, new_entry);
> + ext4_try_merge_freed_extent_next(sbi, root, entry);
> + return;
> }
> +insert:
> + rb_link_node(new_node, parent, n);
> + rb_insert_color(new_node, root);
>
> spin_lock(&sbi->s_md_lock);
> list_add_tail(&new_entry->efd_list, &sbi->s_freed_data_list[new_entry->efd_tid & 1]);
> - atomic_add(clusters, &sbi->s_mb_free_pending);
> spin_unlock(&sbi->s_md_lock);
> }
>
> --
> 2.46.1
>
--
Jan Kara <jack@...e.com>
SUSE Labs, CR
Powered by blists - more mailing lists