lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e9cad692-148b-2d4b-2017-fd0a2ebd2833@kernel.org>
Date:   Wed, 17 Nov 2021 21:31:50 +0800
From:   Chao Yu <chao@...nel.org>
To:     Jaegeuk Kim <jaegeuk@...nel.org>, linux-kernel@...r.kernel.org,
        linux-f2fs-devel@...ts.sourceforge.net
Cc:     Eric Biggers <ebiggers@...gle.com>
Subject: Re: [f2fs-dev] [PATCH 1/6] f2fs: rework write preallocations

On 2021/11/17 5:45, Jaegeuk Kim wrote:
> From: Eric Biggers <ebiggers@...gle.com>
> 
> f2fs_write_begin() assumes that all blocks were preallocated by
> default unless FI_NO_PREALLOC is explicitly set.  This invites data
> corruption, as there are cases in which not all blocks are preallocated.
> Commit 47501f87c61a ("f2fs: preallocate DIO blocks when forcing
> buffered_io") fixed one case, but there are others remaining.
> 
> Fix up this logic by replacing this flag with FI_PREALLOCATED_ALL, which
> only gets set if all blocks for the current write were preallocated.
> 
> Also clean up f2fs_preallocate_blocks(), move it to file.c, and make it
> handle some of the logic that was previously in write_iter() directly.
> 
> Jaegeuk:
> DIO to holes are turning into buffered IO in f2fs_direct_IO, so we should
> not preallocate blocks.
> 
> Signed-off-by: Eric Biggers <ebiggers@...gle.com>
> Signed-off-by: Jaegeuk Kim <jaegeuk@...nel.org>
> ---
>   fs/f2fs/data.c |  55 ++-------------------
>   fs/f2fs/f2fs.h |   3 +-
>   fs/f2fs/file.c | 130 +++++++++++++++++++++++++++++++------------------
>   3 files changed, 87 insertions(+), 101 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 9f754aaef558..3b27fb7daa8b 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1384,53 +1384,6 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
>   	return 0;
>   }
>   
> -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
> -{
> -	struct inode *inode = file_inode(iocb->ki_filp);
> -	struct f2fs_map_blocks map;
> -	int flag;
> -	int err = 0;
> -	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
> -
> -	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
> -	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
> -	if (map.m_len > map.m_lblk)
> -		map.m_len -= map.m_lblk;
> -	else
> -		map.m_len = 0;
> -
> -	map.m_next_pgofs = NULL;
> -	map.m_next_extent = NULL;
> -	map.m_seg_type = NO_CHECK_TYPE;
> -	map.m_may_create = true;
> -
> -	if (direct_io) {
> -		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
> -		flag = f2fs_force_buffered_io(inode, iocb, from) ?
> -					F2FS_GET_BLOCK_PRE_AIO :
> -					F2FS_GET_BLOCK_PRE_DIO;
> -		goto map_blocks;
> -	}
> -	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
> -		err = f2fs_convert_inline_inode(inode);
> -		if (err)
> -			return err;
> -	}
> -	if (f2fs_has_inline_data(inode))
> -		return err;
> -
> -	flag = F2FS_GET_BLOCK_PRE_AIO;
> -
> -map_blocks:
> -	err = f2fs_map_blocks(inode, &map, 1, flag);
> -	if (map.m_len > 0 && err == -ENOSPC) {
> -		if (!direct_io)
> -			set_inode_flag(inode, FI_NO_PREALLOC);
> -		err = 0;
> -	}
> -	return err;
> -}
> -
>   void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
>   {
>   	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
> @@ -3339,12 +3292,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>   	int flag;
>   
>   	/*
> -	 * we already allocated all the blocks, so we don't need to get
> -	 * the block addresses when there is no need to fill the page.
> +	 * If a whole page is being written and we already preallocated all the
> +	 * blocks, then there is no need to get a block address now.
>   	 */
> -	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
> -	    !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
> -	    !f2fs_verity_in_progress(inode))
> +	if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
>   		return 0;
>   
>   	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ce9fc9f13000..be871a79c634 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -715,7 +715,7 @@ enum {
>   	FI_INLINE_DOTS,		/* indicate inline dot dentries */
>   	FI_DO_DEFRAG,		/* indicate defragment is running */
>   	FI_DIRTY_FILE,		/* indicate regular/symlink has dirty pages */
> -	FI_NO_PREALLOC,		/* indicate skipped preallocated blocks */
> +	FI_PREALLOCATED_ALL,	/* all blocks for write were preallocated */
>   	FI_HOT_DATA,		/* indicate file is hot */
>   	FI_EXTRA_ATTR,		/* indicate file has extra attribute */
>   	FI_PROJ_INHERIT,	/* indicate file inherits projectid */
> @@ -3614,7 +3614,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr);
>   int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count);
>   int f2fs_reserve_new_block(struct dnode_of_data *dn);
>   int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
> -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from);
>   int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
>   struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
>   			int op_flags, bool for_write);
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 92ec2699bc85..4bf77a5bf998 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4235,10 +4235,76 @@ static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
>   	return ret;
>   }
>   
> +/*
> + * Preallocate blocks for a write request, if it is possible and helpful to do
> + * so.  Returns a positive number if blocks may have been preallocated, 0 if no
> + * blocks were preallocated, or a negative errno value if something went
> + * seriously wrong.  Also sets FI_PREALLOCATED_ALL on the inode if *all* the
> + * requested blocks (not just some of them) have been allocated.
> + */
> +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter)
> +{
> +	struct inode *inode = file_inode(iocb->ki_filp);
> +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> +	const loff_t pos = iocb->ki_pos;
> +	const size_t count = iov_iter_count(iter);
> +	struct f2fs_map_blocks map = {};
> +	bool dio = (iocb->ki_flags & IOCB_DIRECT) &&
> +		   !f2fs_force_buffered_io(inode, iocb, iter);
> +	int flag;
> +	int ret;
> +
> +	/* If it will be an out-of-place direct write, don't bother. */
> +	if (dio && f2fs_lfs_mode(sbi))
> +		return 0;
> +
> +	/* No-wait I/O can't allocate blocks. */
> +	if (iocb->ki_flags & IOCB_NOWAIT)
> +		return 0;
> +
> +	/* If it will be a short write, don't bother. */
> +	if (fault_in_iov_iter_readable(iter, count))
> +		return 0;
> +
> +	if (f2fs_has_inline_data(inode)) {
> +		/* If the data will fit inline, don't bother. */
> +		if (pos + count <= MAX_INLINE_DATA(inode))
> +			return 0;
> +		ret = f2fs_convert_inline_inode(inode);
> +		if (ret)
> +			return ret;
> +	}
> +

/* do not preallocate block which is partially written */

Otherwise, it looks good to me.

Reviewed-by: Chao Yu <chao@...nel.org>

Thanks,

> +	map.m_lblk = F2FS_BLK_ALIGN(pos);
> +	map.m_len = F2FS_BYTES_TO_BLK(pos + count);
> +	if (map.m_len > map.m_lblk)
> +		map.m_len -= map.m_lblk;
> +	else
> +		map.m_len = 0;
> +	map.m_may_create = true;
> +	if (dio) {
> +		map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
> +		flag = F2FS_GET_BLOCK_PRE_DIO;
> +	} else {
> +		map.m_seg_type = NO_CHECK_TYPE;
> +		flag = F2FS_GET_BLOCK_PRE_AIO;
> +	}
> +
> +	ret = f2fs_map_blocks(inode, &map, 1, flag);
> +	/* -ENOSPC is only a fatal error if no blocks could be allocated. */
> +	if (ret < 0 && !(ret == -ENOSPC && map.m_len > 0))
> +		return ret;
> +	if (ret == 0)
> +		set_inode_flag(inode, FI_PREALLOCATED_ALL);
> +	return map.m_len;
> +}
> +
>   static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   {
>   	struct file *file = iocb->ki_filp;
>   	struct inode *inode = file_inode(file);
> +	loff_t target_size;
> +	int preallocated;
>   	ssize_t ret;
>   
>   	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
> @@ -4262,84 +4328,54 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   
>   	if (unlikely(IS_IMMUTABLE(inode))) {
>   		ret = -EPERM;
> -		goto unlock;
> +		goto out_unlock;
>   	}
>   
>   	if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
>   		ret = -EPERM;
> -		goto unlock;
> +		goto out_unlock;
>   	}
>   
>   	ret = generic_write_checks(iocb, from);
>   	if (ret > 0) {
> -		bool preallocated = false;
> -		size_t target_size = 0;
> -		int err;
> -
> -		if (fault_in_iov_iter_readable(from, iov_iter_count(from)))
> -			set_inode_flag(inode, FI_NO_PREALLOC);
> -
> -		if ((iocb->ki_flags & IOCB_NOWAIT)) {
> +		if (iocb->ki_flags & IOCB_NOWAIT) {
>   			if (!f2fs_overwrite_io(inode, iocb->ki_pos,
>   						iov_iter_count(from)) ||
>   				f2fs_has_inline_data(inode) ||
>   				f2fs_force_buffered_io(inode, iocb, from)) {
> -				clear_inode_flag(inode, FI_NO_PREALLOC);
> -				inode_unlock(inode);
>   				ret = -EAGAIN;
> -				goto out;
> +				goto out_unlock;
>   			}
> -			goto write;
>   		}
> -
> -		if (is_inode_flag_set(inode, FI_NO_PREALLOC))
> -			goto write;
> -
>   		if (iocb->ki_flags & IOCB_DIRECT) {
> -			/*
> -			 * Convert inline data for Direct I/O before entering
> -			 * f2fs_direct_IO().
> -			 */
> -			err = f2fs_convert_inline_inode(inode);
> -			if (err)
> -				goto out_err;
> -			/*
> -			 * If force_buffere_io() is true, we have to allocate
> -			 * blocks all the time, since f2fs_direct_IO will fall
> -			 * back to buffered IO.
> -			 */
> -			if (!f2fs_force_buffered_io(inode, iocb, from) &&
> -					f2fs_lfs_mode(F2FS_I_SB(inode)))
> -				goto write;
> +			ret = f2fs_convert_inline_inode(inode);
> +			if (ret)
> +				goto out_unlock;
>   		}
> -		preallocated = true;
> +		/* Possibly preallocate the blocks for the write. */
>   		target_size = iocb->ki_pos + iov_iter_count(from);
> -
> -		err = f2fs_preallocate_blocks(iocb, from);
> -		if (err) {
> -out_err:
> -			clear_inode_flag(inode, FI_NO_PREALLOC);
> -			inode_unlock(inode);
> -			ret = err;
> -			goto out;
> +		preallocated = f2fs_preallocate_blocks(iocb, from);
> +		if (preallocated < 0) {
> +			ret = preallocated;
> +			goto out_unlock;
>   		}
> -write:
> +
>   		ret = __generic_file_write_iter(iocb, from);
> -		clear_inode_flag(inode, FI_NO_PREALLOC);
>   
> -		/* if we couldn't write data, we should deallocate blocks. */
> -		if (preallocated && i_size_read(inode) < target_size) {
> +		/* Don't leave any preallocated blocks around past i_size. */
> +		if (preallocated > 0 && i_size_read(inode) < target_size) {
>   			down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>   			filemap_invalidate_lock(inode->i_mapping);
>   			f2fs_truncate(inode);
>   			filemap_invalidate_unlock(inode->i_mapping);
>   			up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
>   		}
> +		clear_inode_flag(inode, FI_PREALLOCATED_ALL);
>   
>   		if (ret > 0)
>   			f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
>   	}
> -unlock:
> +out_unlock:
>   	inode_unlock(inode);
>   out:
>   	trace_f2fs_file_write_iter(inode, iocb->ki_pos,
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ