lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8739yx3wd8.fsf@openvz.org>
Date:	Thu, 15 Apr 2010 13:20:35 +0400
From:	Dmitry Monakhov <dmonakhov@...nvz.org>
To:	tytso@....edu
Cc:	linux-ext4@...r.kernel.org, aneesh.kumar@...ux.vnet.ibm.com
Subject: Re: [PATCH] ext4: Do not zeroout uninitialized extents beyond i_size

tytso@....edu writes:

> This is a slightly revised version of the patch where I've made to
> keep ext4_ext_convert_to_initialized() and
> ext4_split_unwritten_extents() more in sync with each other.  (If you
> use "meld" on the these two functions, it's really clear they are very
Yepp. Indeed function are almost identical, and sadly i've missed 
may_zeroout condition update in second one :(. see later in the text
> similar to each other, and really need to be combined --- my plan is
> to smash the two functions together and call the result
> ext4_prepare_uninit_extent().)
>
> I think it should be functionality equivalent to Dmitriy's original
> patch; aside from mostly syntatic/cosmetic changes and spelling fixes,
> the only substantive change I made was simplifying the may_zeroout
> calculation by changing how eof_block is calculated.  I've run it
> through xfstests, and it's passing.
Other useful test is to run fsstress with boosted write/fallocate
probability on small(1Gb)  filesystem
xfstests/ltp/fsstress -p10 -z -f mkdir=1 -f creat=1 -f write=10 \
                      -f resrvsp=10 -n999999999 &
sleep 100
killall -9 fsstress
#umount/e2fsck
>
> 					- Ted
>
> ext4: Do not zeroout uninitialized extents beyond i_size
>
> From: Dmitry Monakhov <dmonakhov@...nvz.org>
>
> The extents code will sometimes zero out blocks and mark them as
> initialized instead of splitting an extent into several smaller ones.
> This optimization however, causes problems if the extent is beyond
> i_size because fsck will complain if there are uninitialized blocks
> after i_size as this can not be distinguished from an inode that has
> an incorrect i_size field.
>
> https://bugzilla.kernel.org/show_bug.cgi?id=15742
>
> Signed-off-by: Dmitry Monakhov <dmonakhov@...nvz.org>
> Signed-off-by: "Theodore Ts'o" <tytso@....edu>
> ---
>  fs/ext4/extents.c |   66 ++++++++++++++++++++++++++++++++++++++++-------------
>  1 files changed, 50 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 228eeaf..7c0438e 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2631,11 +2631,21 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	struct ext4_extent *ex2 = NULL;
>  	struct ext4_extent *ex3 = NULL;
>  	struct ext4_extent_header *eh;
> -	ext4_lblk_t ee_block;
> +	ext4_lblk_t ee_block, eof_block;
>  	unsigned int allocated, ee_len, depth;
>  	ext4_fsblk_t newblock;
>  	int err = 0;
>  	int ret = 0;
> +	int may_zeroout;
> +
> +	ext_debug("ext4_ext_convert_to_initialized: inode %lu, logical"
> +		"block %llu, max_blocks %u\n", inode->i_ino,
> +		(unsigned long long)iblock, max_blocks);
> +
> +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
> +		inode->i_sb->s_blocksize_bits;
> +	if (eof_block < iblock + max_blocks)
> +		eof_block = iblock + max_blocks;
>  
>  	depth = ext_depth(inode);
>  	eh = path[depth].p_hdr;
> @@ -2644,16 +2654,23 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (iblock - ee_block);
>  	newblock = iblock - ee_block + ext_pblock(ex);
> +
>  	ex2 = ex;
>  	orig_ex.ee_block = ex->ee_block;
>  	orig_ex.ee_len   = cpu_to_le16(ee_len);
>  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
>  
> +	/*
> +	 * It is safe to convert extent to initialized via explicit
> +	 * zeroout only if extent is fully insde i_size or new_size.
> +	 */
> +	may_zeroout = ee_block + ee_len <= eof_block;
> +
>  	err = ext4_ext_get_access(handle, inode, path + depth);
>  	if (err)
>  		goto out;
>  	/* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
> -	if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
> +	if (ee_len <= 2*EXT4_EXT_ZERO_LEN && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
> @@ -2684,7 +2701,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	if (allocated > max_blocks) {
>  		unsigned int newdepth;
>  		/* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
> -		if (allocated <= EXT4_EXT_ZERO_LEN) {
> +		if (allocated <= EXT4_EXT_ZERO_LEN && may_zeroout) {
>  			/*
>  			 * iblock == ee_block is handled by the zerouout
>  			 * at the beginning.
> @@ -2760,7 +2777,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
>  		ext4_ext_mark_uninitialized(ex3);
>  		err = ext4_ext_insert_extent(handle, inode, path, ex3, 0);
> -		if (err == -ENOSPC) {
> +		if (err == -ENOSPC && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -2784,8 +2801,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		 * update the extent length after successful insert of the
>  		 * split extent
>  		 */
> -		orig_ex.ee_len = cpu_to_le16(ee_len -
> -						ext4_ext_get_actual_len(ex3));
> +		ee_len -= ext4_ext_get_actual_len(ex3);
> +		orig_ex.ee_len = cpu_to_le16(ee_len);
> +		may_zeroout = ee_block + ee_len <= eof_block;
> +
>  		depth = newdepth;
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode, iblock, path);
> @@ -2809,7 +2828,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		 * otherwise give the extent a chance to merge to left
>  		 */
>  		if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
> -							iblock != ee_block) {
> +			iblock != ee_block && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -2878,7 +2897,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	goto out;
>  insert:
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, 0);
> -	if (err == -ENOSPC) {
> +	if (err == -ENOSPC && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;
> @@ -2938,14 +2957,21 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	struct ext4_extent *ex2 = NULL;
>  	struct ext4_extent *ex3 = NULL;
>  	struct ext4_extent_header *eh;
> -	ext4_lblk_t ee_block;
> +	ext4_lblk_t ee_block, eof_block;
>  	unsigned int allocated, ee_len, depth;
>  	ext4_fsblk_t newblock;
>  	int err = 0;
> +	int may_zeroout;
> +
> +	ext_debug("ext4_split_unwritten_extents: inode %lu, logical"
> +		"block %llu, max_blocks %u\n", inode->i_ino,
> +		(unsigned long long)iblock, max_blocks);
> +
> +	eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
> +		inode->i_sb->s_blocksize_bits;
> +	if (eof_block < iblock + max_blocks)
> +		eof_block = iblock + max_blocks;
>  
> -	ext_debug("ext4_split_unwritten_extents: inode %lu,"
> -		  "iblock %llu, max_blocks %u\n", inode->i_ino,
> -		  (unsigned long long)iblock, max_blocks);
>  	depth = ext_depth(inode);
>  	eh = path[depth].p_hdr;
>  	ex = path[depth].p_ext;
> @@ -2953,12 +2979,19 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (iblock - ee_block);
>  	newblock = iblock - ee_block + ext_pblock(ex);
> +
>  	ex2 = ex;
>  	orig_ex.ee_block = ex->ee_block;
>  	orig_ex.ee_len   = cpu_to_le16(ee_len);
>  	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
>  
>  	/*
> +	 * It is safe to convert extent to initialized via explicit
> +	 * zeroout only if extent is fully insde i_size or new_size.
> +	 */
> +	may_zeroout = ee_block + ee_len <= eof_block;
> +
> +	/*
>   	 * If the uninitialized extent begins at the same logical
>   	 * block where the write begins, and the write completely
>   	 * covers the extent, then we don't need to split it.
> @@ -2992,7 +3025,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  		ex3->ee_len = cpu_to_le16(allocated - max_blocks);
>  		ext4_ext_mark_uninitialized(ex3);
>  		err = ext4_ext_insert_extent(handle, inode, path, ex3, flags);
> -		if (err == -ENOSPC) {
> +		if (err == -ENOSPC && may_zeroout) {
>  			err =  ext4_ext_zeroout(inode, &orig_ex);
>  			if (err)
>  				goto fix_extent_len;
> @@ -3016,8 +3049,9 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  		 * update the extent length after successful insert of the
>  		 * split extent
>  		 */
> -		orig_ex.ee_len = cpu_to_le16(ee_len -
> -						ext4_ext_get_actual_len(ex3));
> +		ee_len -= ext4_ext_get_actual_len(ex3);
> +		orig_ex.ee_len = cpu_to_le16(ee_len);
> +
We have to update may_zeroout since orig extent was changed.
+               may_zeroout = ee_block + ee_len <= eof_block;  
I've attached incremental patch for you.
In fact, this not result in any serious bugs but increase our changes to
use zeroout optimization and helps us to overcome ENOSPC if possible.
>  		depth = newdepth;
>  		ext4_ext_drop_refs(path);
>  		path = ext4_ext_find_extent(inode, iblock, path);
> @@ -3063,7 +3097,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
>  	goto out;
>  insert:
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
> -	if (err == -ENOSPC) {
> +	if (err == -ENOSPC && may_zeroout) {
>  		err =  ext4_ext_zeroout(inode, &orig_ex);
>  		if (err)
>  			goto fix_extent_len;


View attachment "ext4-Do-not-zeroout-uninitialized-extents-beyond-i_s-2" of type "text/plain" (402 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ