[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <62d5naxy5tq2gvi4vv4hhxhjfabkcr7w2qsvz7y73ihei6o7ue@oieo2mwvx344>
Date: Thu, 15 Jan 2026 13:01:14 +0100
From: Jan Kara <jack@...e.cz>
To: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
Cc: linux-ext4@...r.kernel.org, Theodore Ts'o <tytso@....edu>,
Ritesh Harjani <ritesh.list@...il.com>, Zhang Yi <yi.zhang@...wei.com>, Jan Kara <jack@...e.cz>,
libaokun1@...wei.com, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 6/8] ext4: Refactor zeroout path and handle all cases
On Wed 14-01-26 20:27:50, Ojaswin Mujoo wrote:
> Currently, zeroout is used as a fallback in case we fail to
> split/convert extents in the "traditional" modify-the-extent-tree way.
> This is essential to mitigate failures in critical paths like extent
> splitting during endio. However, the logic is very messy and not easy to
> follow. Further, the fragile use of various flags has made it prone to
> errors.
>
> Refactor zeroout out logic by moving it up to ext4_split_extents().
> Further, zeroout correctly based on the type of conversion we want, ie:
> - unwritten to written: Zeroout everything around the mapped range.
> - written to unwritten: Zeroout only the mapped range.
>
> Also, ext4_ext_convert_to_initialized() now passes
> EXT4_GET_BLOCKS_CONVERT to make the intention clear.
>
> Signed-off-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
Overall looks nice. Feel free to add:
Reviewed-by: Jan Kara <jack@...e.cz>
A few nits below:
> +static int ext4_split_extent_zeroout(handle_t *handle, struct inode *inode,
> + struct ext4_ext_path *path,
> + struct ext4_map_blocks *map, int flags)
> +{
> + struct ext4_extent *ex;
> + unsigned int ee_len, depth;
> + ext4_lblk_t ee_block;
> + uint64_t lblk, pblk, len;
> + int is_unwrit;
> + int err = 0;
> +
> + depth = ext_depth(inode);
> + ex = path[depth].p_ext;
> + ee_block = le32_to_cpu(ex->ee_block);
> + ee_len = ext4_ext_get_actual_len(ex);
> + is_unwrit = ext4_ext_is_unwritten(ex);
>
> + if (flags & EXT4_GET_BLOCKS_CONVERT) {
> /*
> - * The first half contains partially valid data, the splitting
> - * of this extent has not been completed, fix extent length
> - * and ext4_split_extent() split will the first half again.
> + * EXT4_GET_BLOCKS_CONVERT: Caller wants the range specified by
> + * map to be initialized. Zeroout everything except the map
> + * range.
> */
> - if (split_flag & EXT4_EXT_DATA_PARTIAL_VALID1) {
> - /*
> - * Drop extent cache to prevent stale unwritten
> - * extents remaining after zeroing out.
> - */
> - ext4_es_remove_extent(inode,
> - le32_to_cpu(zero_ex.ee_block),
> - ext4_ext_get_actual_len(&zero_ex));
> - goto fix_extent_len;
> +
> + loff_t map_end = (loff_t) map->m_lblk + map->m_len;
> + loff_t ex_end = (loff_t) ee_block + ee_len;
> +
> + if (!is_unwrit)
> + /* Shouldn't happen. Just exit */
> + return -EINVAL;
> +
> + /* zeroout left */
> + if (map->m_lblk > ee_block) {
> + lblk = ee_block;
> + len = map->m_lblk - ee_block;
> + pblk = ext4_ext_pblock(ex);
> + err = ext4_issue_zeroout(inode, lblk, pblk, len);
> + if (err)
> + /* ZEROOUT failed, just return original error */
> + return err;
> }
>
> - /* update the extent length and mark as initialized */
> - ex->ee_len = cpu_to_le16(ee_len);
> - ext4_ext_try_to_merge(handle, inode, path, ex);
> - err = ext4_ext_dirty(handle, inode, path + path->p_depth);
> - if (!err)
> - /* update extent status tree */
> - ext4_zeroout_es(inode, &zero_ex);
> + /* zeroout right */
> + if (map->m_lblk + map->m_len < ee_block + ee_len) {
Use map_end and ex_end in the above condition when we have it?
...
> @@ -3382,11 +3428,13 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
> int split_flag, int flags,
> unsigned int *allocated)
> {
> - ext4_lblk_t ee_block;
> + ext4_lblk_t ee_block, orig_ee_block;
> struct ext4_extent *ex;
> - unsigned int ee_len, depth;
> - int unwritten;
> - int split_flag1, flags1;
> + unsigned int ee_len, orig_ee_len, depth;
> + int unwritten, orig_unwritten;
> + int split_flag1 = 0, flags1 = 0;
> + int orig_err = 0;
^^ extra space
> + int orig_flags = flags;
>
> depth = ext_depth(inode);
> ex = path[depth].p_ext;
> @@ -3394,30 +3442,31 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
> ee_len = ext4_ext_get_actual_len(ex);
> unwritten = ext4_ext_is_unwritten(ex);
>
> + orig_ee_block = ee_block;
> + orig_ee_len = ee_len;
> + orig_unwritten = unwritten;
> +
> /* Do not cache extents that are in the process of being modified. */
> flags |= EXT4_EX_NOCACHE;
>
> if (map->m_lblk + map->m_len < ee_block + ee_len) {
> - split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
> flags1 = flags | EXT4_GET_BLOCKS_SPLIT_NOMERGE;
> if (unwritten)
> split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
> EXT4_EXT_MARK_UNWRIT2;
> - if (split_flag & EXT4_EXT_DATA_VALID2)
> - split_flag1 |= map->m_lblk > ee_block ?
> - EXT4_EXT_DATA_PARTIAL_VALID1 :
> - EXT4_EXT_DATA_ENTIRE_VALID1;
> path = ext4_split_extent_at(handle, inode, path,
> map->m_lblk + map->m_len, split_flag1, flags1);
> if (IS_ERR(path))
> - return path;
> + goto try_zeroout;
> +
> /*
> * Update path is required because previous ext4_split_extent_at
> * may result in split of original leaf or extent zeroout.
> */
> path = ext4_find_extent(inode, map->m_lblk, path, flags);
> if (IS_ERR(path))
> - return path;
> + goto try_zeroout;
> +
> depth = ext_depth(inode);
> ex = path[depth].p_ext;
> if (!ex) {
> @@ -3426,22 +3475,64 @@ static struct ext4_ext_path *ext4_split_extent(handle_t *handle,
> ext4_free_ext_path(path);
> return ERR_PTR(-EFSCORRUPTED);
> }
> - unwritten = ext4_ext_is_unwritten(ex);
> }
>
> if (map->m_lblk >= ee_block) {
> - split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
> + split_flag1 = 0;
> if (unwritten) {
> split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
> - split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
> - EXT4_EXT_MARK_UNWRIT2);
> + split_flag1 |= split_flag & EXT4_EXT_MARK_UNWRIT2;
> }
> - path = ext4_split_extent_at(handle, inode, path,
> - map->m_lblk, split_flag1, flags);
> + path = ext4_split_extent_at(handle, inode, path, map->m_lblk,
> + split_flag1, flags);
> if (IS_ERR(path))
> - return path;
> + goto try_zeroout;
> }
>
> + goto success;
> +
> +try_zeroout:
> + /*
> + * There was an error in splitting the extent. So instead, just zeroout
> + * unwritten portions and convert it to initialize as a last resort. If
> + * there is any failure here we just return the original error
> + */
> +
> + orig_err = PTR_ERR(path);
> + if (orig_err != -ENOSPC && orig_err != -EDQUOT && orig_err != -ENOMEM)
> + goto out_orig_err;
> +
> + if (!(split_flag & EXT4_EXT_MAY_ZEROOUT))
> + /* There's an error and we can't zeroout, just return the
> + * original err
> + */
I'd put this before if and just write:
/* We can't zeroout? Just return the original error */
so that the comment fits on a single line :)
> + goto out_orig_err;
> +
> + path = ext4_find_extent(inode, map->m_lblk, NULL, flags);
> + if (IS_ERR(path))
> + goto out_orig_err;
> +
> + depth = ext_depth(inode);
> + ex = path[depth].p_ext;
> + ee_block = le32_to_cpu(ex->ee_block);
> + ee_len = ext4_ext_get_actual_len(ex);
> + unwritten = ext4_ext_is_unwritten(ex);
> +
> + if (WARN_ON(ee_block != orig_ee_block || ee_len != orig_ee_len ||
> + unwritten != orig_unwritten))
> + /*
> + * The extent to zeroout should have been unchanged
> + * but its not.
> + */
> + goto out_free_path;
> +
> + if (ext4_split_extent_zeroout(handle, inode, path, map, orig_flags))
> + /*
> + * Something went wrong in zeroout
> + */
I think this comment isn't really useful...
Honza
--
Jan Kara <jack@...e.com>
SUSE Labs, CR
Powered by blists - more mailing lists