lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 16 Aug 2019 01:01:31 -0600
From:   Andreas Dilger <adilger@...ger.ca>
To:     Dongyang Li <dongyangli@....com>
Cc:     "linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH 2/2] mke2fs: set overhead in super block for bigalloc

On Aug 15, 2019, at 9:49 PM, Dongyang Li <dongyangli@....com> wrote:
> 
> If overhead is not recorded in the super block, it is caculated
> during mount in kernel, for bigalloc file systems the it takes
> O(groups**2) in time.
> For a 1PB deivce with 32K cluste size it takes ~12 mins to
> mount, with most of the time spent on figuring out overhead.
> 
> While we can not improve the overhead algorithm in kernel
> due to the nature of bigalloc, we can work out the overhead
> during mke2fs and set it in the super block, avoiding calculating
> it every time during mounting.
> 
> Overhead is s_first_data_block plus internal journal blocks plus
> the block and inode bitmaps, inode table, super block backups and
> group descriptor blocks for every group. With the patch we calculate
> the overhead when converting the block bitmap to cluster bitmap.
> 
> When bad blocks are involved, it gets tricky because the blocks
> counted as overhead and the bad blocks can end up in the same
> allocation cluster. In this case we will unmark the bad blocks from
> the block bitmap, covert to cluster bitmap and get the overhead,
> then mark the bad blocks back in the cluster bitmap.
> 
> Fix a bug in handle_bad_blocks(), don't covert the bad block to
> cluster when marking it as used, the bitmap is still a block bitmap,
> will be coverted to cluster bitmap later.
> 
> Note: in kernel the overhead is the s_overhead_clusters field from
> struct ext4_super_block, it's named s_overhead_blocks in e2fsprogs.
> 
> Signed-off-by: Li Dongyang <dongyangli@....com>

Reviewed-by: Andreas Dilger <adilger@...ger.ca>

> ---
> lib/ext2fs/ext2fs.h       |  4 +++
> lib/ext2fs/gen_bitmap64.c | 61 ++++++++++++++++++++++++++++++++++-----
> misc/mke2fs.c             | 15 ++++++++--
> 3 files changed, 69 insertions(+), 11 deletions(-)
> 
> diff --git a/lib/ext2fs/ext2fs.h b/lib/ext2fs/ext2fs.h
> index 59fd9742..a70924b3 100644
> --- a/lib/ext2fs/ext2fs.h
> +++ b/lib/ext2fs/ext2fs.h
> @@ -1437,6 +1437,10 @@ errcode_t ext2fs_set_generic_bmap_range(ext2fs_generic_bitmap bmap,
> 					void *in);
> errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> 					   ext2fs_block_bitmap *bitmap);
> +errcode_t ext2fs_convert_subcluster_bitmap_overhead(ext2_filsys fs,
> +						    ext2fs_block_bitmap *bitmap,
> +						    badblocks_list bb_list,
> +						    unsigned int *count);
> 
> /* get_num_dirs.c */
> extern errcode_t ext2fs_get_num_dirs(ext2_filsys fs, ext2_ino_t *ret_num_dirs);
> diff --git a/lib/ext2fs/gen_bitmap64.c b/lib/ext2fs/gen_bitmap64.c
> index 97601232..0f67f9c4 100644
> --- a/lib/ext2fs/gen_bitmap64.c
> +++ b/lib/ext2fs/gen_bitmap64.c
> @@ -794,18 +794,46 @@ void ext2fs_warn_bitmap32(ext2fs_generic_bitmap gen_bitmap, const char *func)
> #endif
> }
> 
> -errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> -					   ext2fs_block_bitmap *bitmap)
> +errcode_t ext2fs_convert_subcluster_bitmap_overhead(ext2_filsys fs,
> +						    ext2fs_block_bitmap *bitmap,
> +						    badblocks_list bb_list,
> +						    unsigned int *count)
> {
> 	ext2fs_generic_bitmap_64 bmap, cmap;
> 	ext2fs_block_bitmap	gen_bmap = *bitmap, gen_cmap;
> 	errcode_t		retval;
> -	blk64_t			i, next, b_end, c_end;
> +	blk64_t			blk, next, b_end, c_end;
> +	unsigned int		clusters = 0;
> +	blk_t			super_and_bgd, bblk;
> +	badblocks_iterate	bb_iter;
> +	dgrp_t			i;
> 	int			ratio;
> 
> 	bmap = (ext2fs_generic_bitmap_64) gen_bmap;
> -	if (fs->cluster_ratio_bits == ext2fs_get_bitmap_granularity(gen_bmap))
> +	if (fs->cluster_ratio_bits ==
> +				ext2fs_get_bitmap_granularity(gen_bmap)) {
> +		if (count) {
> +			for (i = 0; i < fs->group_desc_count; i++) {
> +				ext2fs_super_and_bgd_loc2(fs, i, NULL, NULL,
> +							  NULL,
> +							  &super_and_bgd);
> +				clusters += super_and_bgd +
> +					    fs->inode_blocks_per_group + 2;
> +			}
> +			*count = clusters;
> +		}
> 		return 0;	/* Nothing to do */
> +	}
> +
> +	if (bb_list) {
> +		retval = ext2fs_badblocks_list_iterate_begin(bb_list,
> +							     &bb_iter);
> +		if (retval)
> +			return retval;
> +		while (ext2fs_badblocks_list_iterate(bb_iter, &bblk))
> +			ext2fs_unmark_block_bitmap2(gen_bmap, bblk);
> +		bb_iter->ptr = 0;
> +	}
> 
> 	retval = ext2fs_allocate_block_bitmap(fs, "converted cluster bitmap",
> 					      &gen_cmap);
> @@ -813,27 +841,44 @@ errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> 		return retval;
> 
> 	cmap = (ext2fs_generic_bitmap_64) gen_cmap;
> -	i = bmap->start;
> +	blk = bmap->start;
> 	b_end = bmap->end;
> 	bmap->end = bmap->real_end;
> 	c_end = cmap->end;
> 	cmap->end = cmap->real_end;
> 	ratio = 1 << fs->cluster_ratio_bits;
> -	while (i < bmap->real_end) {
> +	while (blk < bmap->real_end) {
> 		retval = ext2fs_find_first_set_block_bitmap2(gen_bmap,
> -						i, bmap->real_end, &next);
> +						blk, bmap->real_end, &next);
> 		if (retval)
> 			break;
> 		ext2fs_mark_block_bitmap2(gen_cmap, next);
> -		i = bmap->start + roundup(next - bmap->start + 1, ratio);
> +		blk = bmap->start + roundup(next - bmap->start + 1, ratio);
> +		clusters++;
> 	}
> 	bmap->end = b_end;
> 	cmap->end = c_end;
> 	ext2fs_free_block_bitmap(gen_bmap);
> +
> +	if (bb_list) {
> +		while (ext2fs_badblocks_list_iterate(bb_iter, &bblk))
> +			ext2fs_mark_block_bitmap2(gen_cmap, bblk);
> +		ext2fs_badblocks_list_iterate_end(bb_iter);
> +	}
> +
> 	*bitmap = (ext2fs_block_bitmap) cmap;
> +	if (count)
> +		*count = clusters;
> 	return 0;
> }
> 
> +errcode_t ext2fs_convert_subcluster_bitmap(ext2_filsys fs,
> +					   ext2fs_block_bitmap *bitmap)
> +{
> +	return ext2fs_convert_subcluster_bitmap_overhead(fs, bitmap,
> +							 NULL, NULL);
> +}
> +
> errcode_t ext2fs_find_first_zero_generic_bmap(ext2fs_generic_bitmap bitmap,
> 					      __u64 start, __u64 end, __u64 *out)
> {
> diff --git a/misc/mke2fs.c b/misc/mke2fs.c
> index d7cf257e..baa87b36 100644
> --- a/misc/mke2fs.c
> +++ b/misc/mke2fs.c
> @@ -344,7 +344,7 @@ _("Warning: the backup superblock/group descriptors at block %u contain\n"
> 		exit(1);
> 	}
> 	while (ext2fs_badblocks_list_iterate(bb_iter, &blk))
> -		ext2fs_mark_block_bitmap2(fs->block_map, EXT2FS_B2C(fs, blk));
> +		ext2fs_mark_block_bitmap2(fs->block_map, blk);
> 	ext2fs_badblocks_list_iterate_end(bb_iter);
> }
> 
> @@ -2913,6 +2913,7 @@ int main (int argc, char *argv[])
> 	ext2_filsys	fs;
> 	badblocks_list	bb_list = 0;
> 	unsigned int	journal_blocks = 0;
> +	unsigned int	overhead;
> 	unsigned int	i, checkinterval;
> 	int		max_mnt_count;
> 	int		val, hash_alg;
> @@ -3213,7 +3214,9 @@ int main (int argc, char *argv[])
> 	if (!quiet)
> 		printf("%s", _("done                            \n"));
> 
> -	retval = ext2fs_convert_subcluster_bitmap(fs, &fs->block_map);
> +	retval = ext2fs_convert_subcluster_bitmap_overhead(fs, &fs->block_map,
> +							   bb_list,
> +							   &overhead);
> 	if (retval) {
> 		com_err(program_name, retval, "%s",
> 			_("\n\twhile converting subcluster bitmap"));
> @@ -3317,6 +3320,7 @@ int main (int argc, char *argv[])
> 		free(journal_device);
> 	} else if ((journal_size) ||
> 		   ext2fs_has_feature_journal(&fs_param)) {
> +		overhead += EXT2FS_B2C(fs, journal_blocks);
> 		if (super_only) {
> 			printf("%s", _("Skipping journal creation in super-only mode\n"));
> 			fs->super->s_journal_inum = EXT2_JOURNAL_INO;
> @@ -3359,8 +3363,13 @@ no_journal:
> 			       fs->super->s_mmp_update_interval);
> 	}
> 
> -	if (ext2fs_has_feature_bigalloc(&fs_param))
> +	overhead += fs->super->s_first_data_block;
> +
> +	if (ext2fs_has_feature_bigalloc(&fs_param)) {
> 		fix_cluster_bg_counts(fs);
> +		if (!super_only)
> +			fs->super->s_overhead_blocks = overhead;
> +	}
> 	if (ext2fs_has_feature_quota(&fs_param))
> 		create_quota_inodes(fs);
> 
> --
> 2.22.1
> 


Cheers, Andreas






Download attachment "signature.asc" of type "application/pgp-signature" (874 bytes)

Powered by blists - more mailing lists