lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 9 Sep 2019 13:47:28 +0530
From:   Ritesh Harjani <riteshh@...ux.ibm.com>
To:     Matthew Bobrowski <mbobrowski@...browski.org>, tytso@....edu,
        jack@...e.cz, adilger.kernel@...ger.ca
Cc:     linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        david@...morbit.com, hch@...radead.org, darrick.wong@...cle.com
Subject: Re: [PATCH v2 2/6] ext4: move inode extension/truncate code out from
 ext4_iomap_end()



On 9/9/19 4:49 AM, Matthew Bobrowski wrote:
> In preparation for implementing the iomap direct IO write path
> modifications, the inode extension/truncate code needs to be moved out
> from ext4_iomap_end(). For direct IO, if the current code remained
> within ext4_iomap_end() it would behave incorrectly. If we update the
> inode size prior to converting unwritten extents we run the risk of
> allowing a racing direct IO read operation to find unwritten extents
> before they are converted.
> 
> The inode extension/truncate code has been moved out into a new helper
> ext4_handle_inode_extension(). This helper has been designed so that
> it can be used by both DAX and direct IO paths in the instance that
> the result of the write is extending the inode.
> 
> Signed-off-by: Matthew Bobrowski <mbobrowski@...browski.org>
> ---
>   fs/ext4/file.c  | 93 ++++++++++++++++++++++++++++++++++++++++++++++++-
>   fs/ext4/inode.c | 48 +------------------------
>   2 files changed, 93 insertions(+), 48 deletions(-)
> 
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index e52e3928dc25..8e586198f6e6 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -33,6 +33,7 @@
>   #include "ext4_jbd2.h"
>   #include "xattr.h"
>   #include "acl.h"
> +#include "truncate.h"
> 
>   static bool ext4_dio_checks(struct inode *inode)
>   {
> @@ -233,12 +234,91 @@ static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
>   	return iov_iter_count(from);
>   }
> 
> +static int ext4_handle_inode_extension(struct inode *inode, loff_t offset,
> +				       ssize_t len, size_t count)
> +{
> +	handle_t *handle;
> +	bool truncate = false;
> +	ext4_lblk_t written_blk, end_blk;
> +	int ret = 0, blkbits = inode->i_blkbits;
> +
> +	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> +	if (IS_ERR(handle)) {
> +		ret = PTR_ERR(handle);
> +		goto orphan_del;
> +	}
> +
> +	if (ext4_update_inode_size(inode, offset + len))
> +		ext4_mark_inode_dirty(handle, inode);
> +
> +	/*
> +	 * We may need truncate allocated but not written blocks
> +	 * beyond EOF.
> +	 */
> +	written_blk = ALIGN(offset + len, 1 << blkbits);
> +	end_blk = ALIGN(offset + len + count, 1 << blkbits);

why add len in end_blk calculation?
shouldn't this be like below?
	end_blk = ALIGN(offset + count, 1 << blkbits);

> +	if (written_blk < end_blk && ext4_can_truncate(inode))
> +		truncate = true;
> +
> +	/*
> +	 * Remove the inode from the orphan list if it has been
> +	 * extended and everything went OK.
> +	 */
> +	if (!truncate && inode->i_nlink)
> +		ext4_orphan_del(handle, inode);
> +	ext4_journal_stop(handle);
> +
> +	if (truncate) {
> +		ext4_truncate_failed_write(inode);
> +orphan_del:
> +		/*
> +		 * If the truncate operation failed early the inode
> +		 * may still be on the orphan list. In that case, we
> +		 * need try remove the inode from the linked list in
> +		 * memory.
> +		 */
> +		if (inode->i_nlink)
> +			ext4_orphan_del(NULL, inode);
> +	}
> +	return ret;
> +}
> +
> +/*
> + * The inode may have been placed onto the orphan list or has had
> + * blocks allocated beyond EOF as a result of an extension. We need to
> + * ensure that any necessary cleanup routines are performed if the
> + * error path has been taken for a write.
> + */
> +static int ext4_handle_failed_inode_extension(struct inode *inode, loff_t size)
> +{
> +	int ret = 0;

No need of ret anyways.


> +	handle_t *handle;
> +
> +	if (size > i_size_read(inode))
> +		ext4_truncate_failed_write(inode);
> +
> +	if (!list_empty(&EXT4_I(inode)->i_orphan)) {
> +		handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> +		if (IS_ERR(handle)) {
> +			if (inode->i_nlink)
> +				ext4_orphan_del(NULL, inode);
> +			return PTR_ERR(handle);
> +		}
> +		if (inode->i_nlink)
> +			ext4_orphan_del(handle, inode);
> +		ext4_journal_stop(handle);
> +	}
> +	return ret;

can directly call for `return 0;`

> +}
> +
>   #ifdef CONFIG_FS_DAX
>   static ssize_t
>   ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   {
> -	struct inode *inode = file_inode(iocb->ki_filp);
>   	ssize_t ret;
> +	int error = 0;
> +	loff_t offset;
> +	struct inode *inode = file_inode(iocb->ki_filp);
> 
>   	if (!inode_trylock(inode)) {
>   		if (iocb->ki_flags & IOCB_NOWAIT)
> @@ -255,7 +335,18 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from)
>   	if (ret)
>   		goto out;
> 
> +	offset = iocb->ki_pos;
>   	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops);
> +	if (ret > 0 && iocb->ki_pos > i_size_read(inode))
> +		error = ext4_handle_inode_extension(inode, offset, ret,
> +						    iov_iter_count(from));
> +
> +	if (ret < 0)
> +		error = ext4_handle_failed_inode_extension(inode,
> +							   iocb->ki_pos);
> +
> +	if (error)
> +		ret = error;
>   out:
>   	inode_unlock(inode);
>   	if (ret > 0)
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 420fe3deed39..761ce6286b05 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -3601,53 +3601,7 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
>   static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
>   			  ssize_t written, unsigned flags, struct iomap *iomap)
>   {
> -	int ret = 0;
> -	handle_t *handle;
> -	int blkbits = inode->i_blkbits;
> -	bool truncate = false;
> -
> -	if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
> -		return 0;
> -
> -	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
> -	if (IS_ERR(handle)) {
> -		ret = PTR_ERR(handle);
> -		goto orphan_del;
> -	}
> -	if (ext4_update_inode_size(inode, offset + written))
> -		ext4_mark_inode_dirty(handle, inode);
> -	/*
> -	 * We may need to truncate allocated but not written blocks beyond EOF.
> -	 */
> -	if (iomap->offset + iomap->length >
> -	    ALIGN(inode->i_size, 1 << blkbits)) {
> -		ext4_lblk_t written_blk, end_blk;
> -
> -		written_blk = (offset + written) >> blkbits;
> -		end_blk = (offset + length) >> blkbits;
> -		if (written_blk < end_blk && ext4_can_truncate(inode))
> -			truncate = true;
> -	}
> -	/*
> -	 * Remove inode from orphan list if we were extending a inode and
> -	 * everything went fine.
> -	 */
> -	if (!truncate && inode->i_nlink &&
> -	    !list_empty(&EXT4_I(inode)->i_orphan))
> -		ext4_orphan_del(handle, inode);
> -	ext4_journal_stop(handle);
> -	if (truncate) {
> -		ext4_truncate_failed_write(inode);
> -orphan_del:
> -		/*
> -		 * If truncate failed early the inode might still be on the
> -		 * orphan list; we need to make sure the inode is removed from
> -		 * the orphan list in that case.
> -		 */
> -		if (inode->i_nlink)
> -			ext4_orphan_del(NULL, inode);
> -	}
> -	return ret;
> +	return 0;
>   }
> 
>   const struct iomap_ops ext4_iomap_ops = {
> 

Powered by blists - more mailing lists