linux-kernel - Re: [PATCH 3/4] iomap: don't increase i

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240311154829.GU1927156@frogsfrogsfrogs>
Date: Mon, 11 Mar 2024 08:48:29 -0700
From: "Darrick J. Wong" <djwong@...nel.org>
To: Zhang Yi <yi.zhang@...weicloud.com>
Cc: linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org, hch@...radead.org, brauner@...nel.org,
	david@...morbit.com, tytso@....edu, jack@...e.cz,
	yi.zhang@...wei.com, chengzhihao1@...wei.com, yukuai3@...wei.com
Subject: Re: [PATCH 3/4] iomap: don't increase i_size if it's not a write
 operation

On Mon, Mar 11, 2024 at 08:22:54PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@...wei.com>
> 
> Increase i_size in iomap_zero_range() and iomap_unshare_iter() is not
> needed, the caller should handle it. Especially, when truncate partial
> block, we could not increase i_size beyond the new EOF here. It doesn't
> affect xfs and gfs2 now because they set the new file size after zero
> out, it doesn't matter that a transient increase in i_size, but it will
> affect ext4 because it set file size before truncate.

>                                                       At the same time,
> iomap_write_failed() is also not needed for above two cases too, so
> factor them out and move them to iomap_write_iter() and
> iomap_zero_iter().

This change should be a separate patch with its own justification.
Which is, AFAICT, something along the lines of:

"Unsharing and zeroing can only happen within EOF, so there is never a
need to perform posteof pagecache truncation if write begin fails."

> Signed-off-by: Zhang Yi <yi.zhang@...wei.com>

Doesn't this patch fix a bug in ext4?

> ---
>  fs/iomap/buffered-io.c | 59 +++++++++++++++++++++---------------------
>  1 file changed, 30 insertions(+), 29 deletions(-)
> 
> diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
> index 093c4515b22a..19f91324c690 100644
> --- a/fs/iomap/buffered-io.c
> +++ b/fs/iomap/buffered-io.c
> @@ -786,7 +786,6 @@ static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
>  
>  out_unlock:
>  	__iomap_put_folio(iter, pos, 0, folio);
> -	iomap_write_failed(iter->inode, pos, len);
>  
>  	return status;
>  }
> @@ -838,34 +837,13 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
>  		size_t copied, struct folio *folio)
>  {
>  	const struct iomap *srcmap = iomap_iter_srcmap(iter);
> -	loff_t old_size = iter->inode->i_size;
> -	size_t ret;
> -
> -	if (srcmap->type == IOMAP_INLINE) {
> -		ret = iomap_write_end_inline(iter, folio, pos, copied);
> -	} else if (srcmap->flags & IOMAP_F_BUFFER_HEAD) {
> -		ret = block_write_end(NULL, iter->inode->i_mapping, pos, len,
> -				copied, &folio->page, NULL);
> -	} else {
> -		ret = __iomap_write_end(iter->inode, pos, len, copied, folio);
> -	}
>  
> -	/*
> -	 * Update the in-memory inode size after copying the data into the page
> -	 * cache.  It's up to the file system to write the updated size to disk,
> -	 * preferably after I/O completion so that no stale data is exposed.
> -	 */
> -	if (pos + ret > old_size) {
> -		i_size_write(iter->inode, pos + ret);
> -		iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
> -	}
> -	__iomap_put_folio(iter, pos, ret, folio);
> -
> -	if (old_size < pos)
> -		pagecache_isize_extended(iter->inode, old_size, pos);
> -	if (ret < len)
> -		iomap_write_failed(iter->inode, pos + ret, len - ret);
> -	return ret;
> +	if (srcmap->type == IOMAP_INLINE)
> +		return iomap_write_end_inline(iter, folio, pos, copied);
> +	if (srcmap->flags & IOMAP_F_BUFFER_HEAD)
> +		return block_write_end(NULL, iter->inode->i_mapping, pos, len,
> +				       copied, &folio->page, NULL);
> +	return __iomap_write_end(iter->inode, pos, len, copied, folio);
>  }
>  
>  static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
> @@ -880,6 +858,7 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>  
>  	do {
>  		struct folio *folio;
> +		loff_t old_size;
>  		size_t offset;		/* Offset into folio */
>  		size_t bytes;		/* Bytes to write to folio */
>  		size_t copied;		/* Bytes copied from user */
> @@ -912,8 +891,10 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>  		}
>  
>  		status = iomap_write_begin(iter, pos, bytes, &folio);
> -		if (unlikely(status))
> +		if (unlikely(status)) {
> +			iomap_write_failed(iter->inode, pos, bytes);
>  			break;
> +		}
>  		if (iter->iomap.flags & IOMAP_F_STALE)
>  			break;
>  
> @@ -927,6 +908,24 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
>  		copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
>  		status = iomap_write_end(iter, pos, bytes, copied, folio);
>  
> +		/*
> +		 * Update the in-memory inode size after copying the data into
> +		 * the page cache.  It's up to the file system to write the
> +		 * updated size to disk, preferably after I/O completion so that
> +		 * no stale data is exposed.
> +		 */
> +		old_size = iter->inode->i_size;
> +		if (pos + status > old_size) {
> +			i_size_write(iter->inode, pos + status);
> +			iter->iomap.flags |= IOMAP_F_SIZE_CHANGED;
> +		}
> +		__iomap_put_folio(iter, pos, status, folio);

Why is it necessary to hoist the __iomap_put_folio calls from
iomap_write_end into iomap_write_iter, iomap_unshare_iter, and
iomap_zero_iter?  None of those functions seem to use it, and it makes
more sense to me that iomap_write_end releases the folio that
iomap_write_begin returned.

--D

> +
> +		if (old_size < pos)
> +			pagecache_isize_extended(iter->inode, old_size, pos);
> +		if (status < bytes)
> +			iomap_write_failed(iter->inode, pos + status,
> +					   bytes - status);
>  		if (unlikely(copied != status))
>  			iov_iter_revert(i, copied - status);
>  
> @@ -1296,6 +1295,7 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
>  			bytes = folio_size(folio) - offset;
>  
>  		bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
> +		__iomap_put_folio(iter, pos, bytes, folio);
>  		if (WARN_ON_ONCE(bytes == 0))
>  			return -EIO;
>  
> @@ -1360,6 +1360,7 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
>  		folio_mark_accessed(folio);
>  
>  		bytes = iomap_write_end(iter, pos, bytes, bytes, folio);
> +		__iomap_put_folio(iter, pos, bytes, folio);
>  		if (WARN_ON_ONCE(bytes == 0))
>  			return -EIO;
>  
> -- 
> 2.39.2
> 
>