lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87sj47gq71.fsf@openvz.org>
Date:	Thu, 07 Mar 2013 19:55:14 +0400
From:	Dmitry Monakhov <dmonakhov@...nvz.org>
To:	Zheng Liu <gnehzuil.liu@...il.com>, linux-ext4@...r.kernel.org
Cc:	Zheng Liu <wenqing.lz@...bao.com>, Theodore Ts'o <tytso@....edu>
Subject: Re: [PATCH v2 4/5] ext4: update extent status tree after an extent is zeroed out

On Wed,  6 Mar 2013 22:17:14 +0800, Zheng Liu <gnehzuil.liu@...il.com> wrote:
> From: Zheng Liu <wenqing.lz@...bao.com>
> 
> When we try to split an extent, this extent could be zeroed out and mark
> as initialized.  But we don't know this in ext4_map_blocks because it
> only returns a length of allocated extent.  Meanwhile we will mark this
> extent as uninitialized because we only check m_flags.
> 
> This commit update extent status tree when we try to split an unwritten
> extent.  We don't need to worry about the status of this extent because
> we always mark it as initialized.
> 
> Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
> Cc: "Theodore Ts'o" <tytso@....edu>
> Cc: Dmitry Monakhov <dmonakhov@...nvz.org>
> ---
>  fs/ext4/extents.c        | 35 +++++++++++++++++++++++++++++++----
>  fs/ext4/extents_status.c | 17 +++++++++++++++++
>  fs/ext4/extents_status.h |  3 +++
>  fs/ext4/inode.c          | 10 ++++++++++
>  4 files changed, 61 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 110e85a..7e37018 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2925,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
>  {
>  	ext4_fsblk_t newblock;
>  	ext4_lblk_t ee_block;
> -	struct ext4_extent *ex, newex, orig_ex;
> +	struct ext4_extent *ex, newex, orig_ex, zero_ex;
>  	struct ext4_extent *ex2 = NULL;
>  	unsigned int ee_len, depth;
>  	int err = 0;
> @@ -2996,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
>  	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
>  	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
>  		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
> -			if (split_flag & EXT4_EXT_DATA_VALID1)
> +			if (split_flag & EXT4_EXT_DATA_VALID1) {
>  				err = ext4_ext_zeroout(inode, ex2);
> -			else
> +				zero_ex.ee_block = ex2->ee_block;
> +				zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
> +				ext4_ext_store_pblock(&zero_ex,
> +						      ext4_ext_pblock(ex2));
> +			} else {
>  				err = ext4_ext_zeroout(inode, ex);
> -		} else
> +				zero_ex.ee_block = ex->ee_block;
> +				zero_ex.ee_len = ext4_ext_get_actual_len(ex);
> +				ext4_ext_store_pblock(&zero_ex,
> +						      ext4_ext_pblock(ex));
> +			}
> +		} else {
>  			err = ext4_ext_zeroout(inode, &orig_ex);
> +			zero_ex.ee_block = orig_ex.ee_block;
> +			zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
> +			ext4_ext_store_pblock(&zero_ex,
> +					      ext4_ext_pblock(&orig_ex));              
> +		}
>  		if (err)
>  			goto fix_extent_len;
> @@ -3009,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
>  		ex->ee_len = cpu_to_le16(ee_len);
>  		ext4_ext_try_to_merge(handle, inode, path, ex);
>  		err = ext4_ext_dirty(handle, inode, path + path->p_depth);
> +		if (err)
> +			goto fix_extent_len;
> +
> +		/* update extent status tree */
> +		err = ext4_es_zeroout(inode, &zero_ex);
> +
Previous blocks are correct but too complex.
At this point we know that extent "ex" becomes initialized so just
manually update it like follows:
        err = ext4_es_insert_extent(inode, ee_block, ee_len,
                                                   ext4_ext_pblock(ex),
                                                   EXTENT_STATUS_WRITTEN);
BTW I'm wonder what happen if one of ext4_es_xxx functions failed with
error. ASAIU this possible only incase of ENOMEM so it is very unlikely
but allowed. If this happens then es_tree will be out of sinc with
extent_tree which later result in corruption.
>  		goto out;
>  	} else if (err)
>  		goto fix_extent_len;
> @@ -3150,6 +3170,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  	ee_block = le32_to_cpu(ex->ee_block);
>  	ee_len = ext4_ext_get_actual_len(ex);
>  	allocated = ee_len - (map->m_lblk - ee_block);
> +	zero_ex.ee_len = 0;
>  
>  	trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
>  
> @@ -3247,6 +3268,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		err = ext4_ext_zeroout(inode, ex);
>  		if (err)
>  			goto out;
> +		zero_ex.ee_block = ex->ee_block;
> +		zero_ex.ee_len = ext4_ext_get_actual_len(ex);
> +		ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
>  
>  		err = ext4_ext_get_access(handle, inode, path + depth);
>  		if (err)
> @@ -3305,6 +3329,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
>  		err = allocated;
>  
>  out:
> +	/* If we have gotten a failure, don't zero out status tree */
> +	if (!err)
> +		err = ext4_es_zeroout(inode, &zero_ex);
>  	return err ? err : allocated;
>  }
>  
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index a434f81..e7bebbc 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -854,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
>  	return err;
>  }
>  
> +int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
> +{
> +	ext4_lblk_t  ee_block;
> +	ext4_fsblk_t ee_pblock;
> +	unsigned int ee_len;
> +
> +	ee_block  = le32_to_cpu(ex->ee_block);
> +	ee_len    = ext4_ext_get_actual_len(ex);
> +	ee_pblock = ext4_ext_pblock(ex);
Andreas Dilger do not like local variables which used only once.  Let's avoid that.
> +
> +	if (ee_len == 0)
> +		return 0;
> +
> +	return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
> +				     EXTENT_STATUS_WRITTEN);
> +}
> +
>  static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
>  {
>  	struct ext4_sb_info *sbi = container_of(shrink,
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index 56140ad..d8e2d4d 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -39,6 +39,8 @@
>  				 EXTENT_STATUS_DELAYED | \
>  				 EXTENT_STATUS_HOLE)
>  
> +struct ext4_extent;
> +
>  struct extent_status {
>  	struct rb_node rb_node;
>  	ext4_lblk_t es_lblk;	/* first logical block extent covers */
> @@ -64,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
>  					struct extent_status *es);
>  extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
>  				 struct extent_status *es);
> +extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
>  
>  static inline int ext4_es_is_written(struct extent_status *es)
>  {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3186a43..4f1d54a 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -722,6 +722,15 @@ found:
>  		}
>  #endif
>  
> +		/*
> +		 * If the extent has been zeroed out, we don't need to update
> +		 * extent status tree.
> +		 */
> +		if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
> +		    ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
> +			if (ext4_es_is_written(&es))
> +				goto has_zeroout;
> +		}
>  		status = map->m_flags & EXT4_MAP_UNWRITTEN ?
>  				EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
>  		if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
> @@ -734,6 +743,7 @@ found:
>  			retval = ret;
>  	}
>  
> +has_zeroout:
>  	up_write((&EXT4_I(inode)->i_data_sem));
>  	if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
>  		int ret = check_block_validity(inode, map);
> -- 
> 1.7.12.rc2.18.g61b472e
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ