[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <E85D2BFF-0B55-443B-A529-BA02F07DAE8B@dilger.ca>
Date: Fri, 16 Aug 2019 01:01:31 -0600
From: Andreas Dilger <adilger@...ger.ca>
To: Dongyang Li <dongyangli@....com>
Cc: "linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH 2/2] mke2fs: set overhead in super block for bigalloc
On Aug 15, 2019, at 9:49 PM, Dongyang Li <dongyangli@....com> wrote:
>
> If overhead is not recorded in the super block, it is caculated
> during mount in kernel, for bigalloc file systems the it takes
> O(groups**2) in time.
> For a 1PB deivce with 32K cluste size it takes ~12 mins to
> mount, with most of the time spent on figuring out overhead.
>
> While we can not improve the overhead algorithm in kernel
> due to the nature of bigalloc, we can work out the overhead
> during mke2fs and set it in the super block, avoiding calculating
> it every time during mounting.
>
> Overhead is s_first_data_block plus internal journal blocks plus
> the block and inode bitmaps, inode table, super block backups and
> group descriptor blocks for every group. With the patch we calculate
> the overhead when converting the block bitmap to cluster bitmap.
>
> When bad blocks are involved, it gets tricky because the blocks
> counted as overhead and the bad blocks can end up in the same
> allocation cluster. In this case we will unmark the bad blocks from
> the block bitmap, covert to cluster bitmap and get the overhead,
> then mark the bad blocks back in the cluster bitmap.
>
> Fix a bug in handle_bad_blocks(), don't covert the bad block to
> cluster when marking it as used, the bitmap is still a block bitmap,
> will be coverted to cluster bitmap later.
>
> Note: in kernel the overhead is the s_overhead_clusters field from
> struct ext4_super_block, it's named s_overhead_blocks in e2fsprogs.
>
> Signed-off-by: Li Dongyang <dongyangli@....com>
Reviewed-by: Andreas Dilger <adilger@...ger.ca>
> ---
> lib/ext2fs/ext2fs.h | 4 +++
> lib/ext2fs/gen_bitmap64.c | 61 ++++++++++++++++++++++++++++++++++-----
> misc/mke2fs.c | 15 ++++++++--
> 3 files changed, 69 insertions(+), 11 deletions(-)
>
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 59fd9742..a70924b3 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -1437,6 +1437,10 @@ errcode_t ext2fs_set_generic_bmap_range(ext2fs_generic_bitmap bmap,
> void *in);
> errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> ext2fs_block_bitmap *bitmap);
> +errcode_t ext2fs_convert_subcluster_bitmap_overhead(ext2_filsys fs,
> + ext2fs_block_bitmap *bitmap,
> + badblocks_list bb_list,
> + unsigned int *count);
>
> /* get_num_dirs.c */
> extern errcode_t ext2fs_get_num_dirs(ext2_filsys fs, ext2_ino_t *ret_num_dirs);
> diff --git a/lib/ext2fs/gen_bitmap64.c b/lib/ext2fs/gen_bitmap64.c
> index 97601232..0f67f9c4 100644
> --- a/lib/ext2fs/gen_bitmap64.c
> +++ b/lib/ext2fs/gen_bitmap64.c
> @@ -794,18 +794,46 @@ void ext2fs_warn_bitmap32(ext2fs_generic_bitmap gen_bitmap, const char *func)
> #endif
> }
>
> -errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> - ext2fs_block_bitmap *bitmap)
> +errcode_t ext2fs_convert_subcluster_bitmap_overhead(ext2_filsys fs,
> + ext2fs_block_bitmap *bitmap,
> + badblocks_list bb_list,
> + unsigned int *count)
> {
> ext2fs_generic_bitmap_64 bmap, cmap;
> ext2fs_block_bitmap gen_bmap = *bitmap, gen_cmap;
> errcode_t retval;
> - blk64_t i, next, b_end, c_end;
> + blk64_t blk, next, b_end, c_end;
> + unsigned int clusters = 0;
> + blk_t super_and_bgd, bblk;
> + badblocks_iterate bb_iter;
> + dgrp_t i;
> int ratio;
>
> bmap = (ext2fs_generic_bitmap_64) gen_bmap;
> - if (fs->cluster_ratio_bits == ext2fs_get_bitmap_granularity(gen_bmap))
> + if (fs->cluster_ratio_bits ==
> + ext2fs_get_bitmap_granularity(gen_bmap)) {
> + if (count) {
> + for (i = 0; i < fs->group_desc_count; i++) {
> + ext2fs_super_and_bgd_loc2(fs, i, NULL, NULL,
> + NULL,
> + &super_and_bgd);
> + clusters += super_and_bgd +
> + fs->inode_blocks_per_group + 2;
> + }
> + *count = clusters;
> + }
> return 0; /* Nothing to do */
> + }
> +
> + if (bb_list) {
> + retval = ext2fs_badblocks_list_iterate_begin(bb_list,
> + &bb_iter);
> + if (retval)
> + return retval;
> + while (ext2fs_badblocks_list_iterate(bb_iter, &bblk))
> + ext2fs_unmark_block_bitmap2(gen_bmap, bblk);
> + bb_iter->ptr = 0;
> + }
>
> retval = ext2fs_allocate_block_bitmap(fs, "converted cluster bitmap",
> &gen_cmap);
> @@ -813,27 +841,44 @@ errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> return retval;
>
> cmap = (ext2fs_generic_bitmap_64) gen_cmap;
> - i = bmap->start;
> + blk = bmap->start;
> b_end = bmap->end;
> bmap->end = bmap->real_end;
> c_end = cmap->end;
> cmap->end = cmap->real_end;
> ratio = 1 << fs->cluster_ratio_bits;
> - while (i < bmap->real_end) {
> + while (blk < bmap->real_end) {
> retval = ext2fs_find_first_set_block_bitmap2(gen_bmap,
> - i, bmap->real_end, &next);
> + blk, bmap->real_end, &next);
> if (retval)
> break;
> ext2fs_mark_block_bitmap2(gen_cmap, next);
> - i = bmap->start + roundup(next - bmap->start + 1, ratio);
> + blk = bmap->start + roundup(next - bmap->start + 1, ratio);
> + clusters++;
> }
> bmap->end = b_end;
> cmap->end = c_end;
> ext2fs_free_block_bitmap(gen_bmap);
> +
> + if (bb_list) {
> + while (ext2fs_badblocks_list_iterate(bb_iter, &bblk))
> + ext2fs_mark_block_bitmap2(gen_cmap, bblk);
> + ext2fs_badblocks_list_iterate_end(bb_iter);
> + }
> +
> *bitmap = (ext2fs_block_bitmap) cmap;
> + if (count)
> + *count = clusters;
> return 0;
> }
>
> +errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> + ext2fs_block_bitmap *bitmap)
> +{
> + return ext2fs_convert_subcluster_bitmap_overhead(fs, bitmap,
> + NULL, NULL);
> +}
> +
> errcode_t ext2fs_find_first_zero_generic_bmap(ext2fs_generic_bitmap bitmap,
> __u64 start, __u64 end, __u64 *out)
> {
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index d7cf257e..baa87b36 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -344,7 +344,7 @@ _("Warning: the backup superblock/group descriptors at block %u contain\n"
> exit(1);
> }
> while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
> - ext2fs_mark_block_bitmap2(fs->block_map, EXT2FS_B2C(fs, blk));
> + ext2fs_mark_block_bitmap2(fs->block_map, blk);
> ext2fs_badblocks_list_iterate_end(bb_iter);
> }
>
> @@ -2913,6 +2913,7 @@ int main (int argc, char *argv[])
> ext2_filsys fs;
> badblocks_list bb_list = 0;
> unsigned int journal_blocks = 0;
> + unsigned int overhead;
> unsigned int i, checkinterval;
> int max_mnt_count;
> int val, hash_alg;
> @@ -3213,7 +3214,9 @@ int main (int argc, char *argv[])
> if (!quiet)
> printf("%s", _("done \n"));
>
> - retval = ext2fs_convert_subcluster_bitmap(fs, &fs->block_map);
> + retval = ext2fs_convert_subcluster_bitmap_overhead(fs, &fs->block_map,
> + bb_list,
> + &overhead);
> if (retval) {
> com_err(program_name, retval, "%s",
> _("\n\twhile converting subcluster bitmap"));
> @@ -3317,6 +3320,7 @@ int main (int argc, char *argv[])
> free(journal_device);
> } else if ((journal_size) ||
> ext2fs_has_feature_journal(&fs_param)) {
> + overhead += EXT2FS_B2C(fs, journal_blocks);
> if (super_only) {
> printf("%s", _("Skipping journal creation in super-only mode\n"));
> fs->super->s_journal_inum = EXT2_JOURNAL_INO;
> @@ -3359,8 +3363,13 @@ no_journal:
> fs->super->s_mmp_update_interval);
> }
>
> - if (ext2fs_has_feature_bigalloc(&fs_param))
> + overhead += fs->super->s_first_data_block;
> +
> + if (ext2fs_has_feature_bigalloc(&fs_param)) {
> fix_cluster_bg_counts(fs);
> + if (!super_only)
> + fs->super->s_overhead_blocks = overhead;
> + }
> if (ext2fs_has_feature_quota(&fs_param))
> create_quota_inodes(fs);
>
> --
> 2.22.1
>
Cheers, Andreas
Download attachment "signature.asc" of type "application/pgp-signature" (874 bytes)
Powered by blists - more mailing lists