[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87sj47gq71.fsf@openvz.org>
Date: Thu, 07 Mar 2013 19:55:14 +0400
From: Dmitry Monakhov <dmonakhov@...nvz.org>
To: Zheng Liu <gnehzuil.liu@...il.com>, linux-ext4@...r.kernel.org
Cc: Zheng Liu <wenqing.lz@...bao.com>, Theodore Ts'o <tytso@....edu>
Subject: Re: [PATCH v2 4/5] ext4: update extent status tree after an extent is zeroed out
On Wed, 6 Mar 2013 22:17:14 +0800, Zheng Liu <gnehzuil.liu@...il.com> wrote:
> From: Zheng Liu <wenqing.lz@...bao.com>
>
> When we try to split an extent, this extent could be zeroed out and mark
> as initialized. But we don't know this in ext4_map_blocks because it
> only returns a length of allocated extent. Meanwhile we will mark this
> extent as uninitialized because we only check m_flags.
>
> This commit update extent status tree when we try to split an unwritten
> extent. We don't need to worry about the status of this extent because
> we always mark it as initialized.
>
> Signed-off-by: Zheng Liu <wenqing.lz@...bao.com>
> Cc: "Theodore Ts'o" <tytso@....edu>
> Cc: Dmitry Monakhov <dmonakhov@...nvz.org>
> ---
> fs/ext4/extents.c | 35 +++++++++++++++++++++++++++++++----
> fs/ext4/extents_status.c | 17 +++++++++++++++++
> fs/ext4/extents_status.h | 3 +++
> fs/ext4/inode.c | 10 ++++++++++
> 4 files changed, 61 insertions(+), 4 deletions(-)
>
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 110e85a..7e37018 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -2925,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
> {
> ext4_fsblk_t newblock;
> ext4_lblk_t ee_block;
> - struct ext4_extent *ex, newex, orig_ex;
> + struct ext4_extent *ex, newex, orig_ex, zero_ex;
> struct ext4_extent *ex2 = NULL;
> unsigned int ee_len, depth;
> int err = 0;
> @@ -2996,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
> err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
> if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
> if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
> - if (split_flag & EXT4_EXT_DATA_VALID1)
> + if (split_flag & EXT4_EXT_DATA_VALID1) {
> err = ext4_ext_zeroout(inode, ex2);
> - else
> + zero_ex.ee_block = ex2->ee_block;
> + zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
> + ext4_ext_store_pblock(&zero_ex,
> + ext4_ext_pblock(ex2));
> + } else {
> err = ext4_ext_zeroout(inode, ex);
> - } else
> + zero_ex.ee_block = ex->ee_block;
> + zero_ex.ee_len = ext4_ext_get_actual_len(ex);
> + ext4_ext_store_pblock(&zero_ex,
> + ext4_ext_pblock(ex));
> + }
> + } else {
> err = ext4_ext_zeroout(inode, &orig_ex);
> + zero_ex.ee_block = orig_ex.ee_block;
> + zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
> + ext4_ext_store_pblock(&zero_ex,
> + ext4_ext_pblock(&orig_ex));
> + }
> if (err)
> goto fix_extent_len;
> @@ -3009,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
> ex->ee_len = cpu_to_le16(ee_len);
> ext4_ext_try_to_merge(handle, inode, path, ex);
> err = ext4_ext_dirty(handle, inode, path + path->p_depth);
> + if (err)
> + goto fix_extent_len;
> +
> + /* update extent status tree */
> + err = ext4_es_zeroout(inode, &zero_ex);
> +
Previous blocks are correct but too complex.
At this point we know that extent "ex" becomes initialized so just
manually update it like follows:
err = ext4_es_insert_extent(inode, ee_block, ee_len,
ext4_ext_pblock(ex),
EXTENT_STATUS_WRITTEN);
BTW I'm wonder what happen if one of ext4_es_xxx functions failed with
error. ASAIU this possible only incase of ENOMEM so it is very unlikely
but allowed. If this happens then es_tree will be out of sinc with
extent_tree which later result in corruption.
> goto out;
> } else if (err)
> goto fix_extent_len;
> @@ -3150,6 +3170,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> ee_block = le32_to_cpu(ex->ee_block);
> ee_len = ext4_ext_get_actual_len(ex);
> allocated = ee_len - (map->m_lblk - ee_block);
> + zero_ex.ee_len = 0;
>
> trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
>
> @@ -3247,6 +3268,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> err = ext4_ext_zeroout(inode, ex);
> if (err)
> goto out;
> + zero_ex.ee_block = ex->ee_block;
> + zero_ex.ee_len = ext4_ext_get_actual_len(ex);
> + ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
>
> err = ext4_ext_get_access(handle, inode, path + depth);
> if (err)
> @@ -3305,6 +3329,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
> err = allocated;
>
> out:
> + /* If we have gotten a failure, don't zero out status tree */
> + if (!err)
> + err = ext4_es_zeroout(inode, &zero_ex);
> return err ? err : allocated;
> }
>
> diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
> index a434f81..e7bebbc 100644
> --- a/fs/ext4/extents_status.c
> +++ b/fs/ext4/extents_status.c
> @@ -854,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
> return err;
> }
>
> +int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
> +{
> + ext4_lblk_t ee_block;
> + ext4_fsblk_t ee_pblock;
> + unsigned int ee_len;
> +
> + ee_block = le32_to_cpu(ex->ee_block);
> + ee_len = ext4_ext_get_actual_len(ex);
> + ee_pblock = ext4_ext_pblock(ex);
Andreas Dilger do not like local variables which used only once. Let's avoid that.
> +
> + if (ee_len == 0)
> + return 0;
> +
> + return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
> + EXTENT_STATUS_WRITTEN);
> +}
> +
> static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
> {
> struct ext4_sb_info *sbi = container_of(shrink,
> diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
> index 56140ad..d8e2d4d 100644
> --- a/fs/ext4/extents_status.h
> +++ b/fs/ext4/extents_status.h
> @@ -39,6 +39,8 @@
> EXTENT_STATUS_DELAYED | \
> EXTENT_STATUS_HOLE)
>
> +struct ext4_extent;
> +
> struct extent_status {
> struct rb_node rb_node;
> ext4_lblk_t es_lblk; /* first logical block extent covers */
> @@ -64,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
> struct extent_status *es);
> extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
> struct extent_status *es);
> +extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
>
> static inline int ext4_es_is_written(struct extent_status *es)
> {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 3186a43..4f1d54a 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -722,6 +722,15 @@ found:
> }
> #endif
>
> + /*
> + * If the extent has been zeroed out, we don't need to update
> + * extent status tree.
> + */
> + if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
> + ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
> + if (ext4_es_is_written(&es))
> + goto has_zeroout;
> + }
> status = map->m_flags & EXT4_MAP_UNWRITTEN ?
> EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
> if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
> @@ -734,6 +743,7 @@ found:
> retval = ret;
> }
>
> +has_zeroout:
> up_write((&EXT4_I(inode)->i_data_sem));
> if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
> int ret = check_block_validity(inode, map);
> --
> 1.7.12.rc2.18.g61b472e
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists