lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241204115208.g4lswqfbwrwmwtqw@quack3>
Date: Wed, 4 Dec 2024 12:52:08 +0100
From: Jan Kara <jack@...e.cz>
To: Zhang Yi <yi.zhang@...weicloud.com>
Cc: linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org, tytso@....edu,
	adilger.kernel@...ger.ca, jack@...e.cz, ritesh.list@...il.com,
	hch@...radead.org, djwong@...nel.org, david@...morbit.com,
	zokeefe@...gle.com, yi.zhang@...wei.com, chengzhihao1@...wei.com,
	yukuai3@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH 05/27] ext4: refactor ext4_zero_range()

On Tue 22-10-24 19:10:36, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@...wei.com>
> 
> The current implementation of ext4_zero_range() contains complex
> position calculations and stale error tags. To improve the code's
> clarity and maintainability, it is essential to clean up the code and
> improve its readability, this can be achieved by: a) simplifying and
> renaming variables, making the style the same as ext4_punch_hole(); b)
> eliminating unnecessary position calculations, writing back all data in
> data=journal mode, and drop page cache from the original offset to the
> end, rather than using aligned blocks; c) renaming the stale out_mutex
> tags.
> 
> Signed-off-by: Zhang Yi <yi.zhang@...wei.com>

...

> -		goto out_mutex;
> -
> -	/* Preallocate the range including the unaligned edges */
> -	if (partial_begin || partial_end) {
> -		ret = ext4_alloc_file_blocks(file,
> -				round_down(offset, 1 << blkbits) >> blkbits,
> -				(round_up((offset + len), 1 << blkbits) -
> -				 round_down(offset, 1 << blkbits)) >> blkbits,
> -				new_size, flags);
> -		if (ret)
> -			goto out_mutex;
> -
> -	}

So I think we should keep this first ext4_alloc_file_blocks() call before
we truncate the page cache. Otherwise if ext4_alloc_file_blocks() fails due
to ENOSPC, we have already lost the dirty data originally in the zeroed
range. All the other failure modes are kind of catastrophic anyway, so they
are fine after dropping the page cache. But this is can be quite common and
should be handled more gracefully.

								Honza

> -
> -	/* Zero range excluding the unaligned edges */
> -	if (max_blocks > 0) {
> -		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
> -			  EXT4_EX_NOCACHE);
> +		goto out;
>  
> -		/*
> -		 * Prevent page faults from reinstantiating pages we have
> -		 * released from page cache.
> -		 */
> -		filemap_invalidate_lock(mapping);
> +	/*
> +	 * Prevent page faults from reinstantiating pages we have released
> +	 * from page cache.
> +	 */
> +	filemap_invalidate_lock(mapping);
>  
> -		ret = ext4_break_layouts(inode);
> -		if (ret) {
> -			filemap_invalidate_unlock(mapping);
> -			goto out_mutex;
> -		}
> +	ret = ext4_break_layouts(inode);
> +	if (ret)
> +		goto out_invalidate_lock;
>  
> +	/*
> +	 * For journalled data we need to write (and checkpoint) pages before
> +	 * discarding page cache to avoid inconsitent data on disk in case of
> +	 * crash before zeroing trans is committed.
> +	 */
> +	if (ext4_should_journal_data(inode)) {
> +		ret = filemap_write_and_wait_range(mapping, offset, end - 1);
> +	} else {
>  		ret = ext4_update_disksize_before_punch(inode, offset, len);
> -		if (ret) {
> -			filemap_invalidate_unlock(mapping);
> -			goto out_mutex;
> -		}
> +		ext4_truncate_folios_range(inode, offset, end);
> +	}
> +	if (ret)
> +		goto out_invalidate_lock;
>  
> -		/*
> -		 * For journalled data we need to write (and checkpoint) pages
> -		 * before discarding page cache to avoid inconsitent data on
> -		 * disk in case of crash before zeroing trans is committed.
> -		 */
> -		if (ext4_should_journal_data(inode)) {
> -			ret = filemap_write_and_wait_range(mapping, start,
> -							   end - 1);
> -			if (ret) {
> -				filemap_invalidate_unlock(mapping);
> -				goto out_mutex;
> -			}
> -		}
> +	/* Now release the pages and zero block aligned part of pages */
> +	truncate_pagecache_range(inode, offset, end - 1);
>  
> -		/* Now release the pages and zero block aligned part of pages */
> -		ext4_truncate_folios_range(inode, start, end);
> -		truncate_pagecache_range(inode, start, end - 1);
> +	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
> +	/* Preallocate the range including the unaligned edges */
> +	if (offset & (blocksize - 1) || end & (blocksize - 1)) {
> +		ext4_lblk_t alloc_lblk = offset >> blkbits;
> +		ext4_lblk_t len_lblk = EXT4_MAX_BLOCKS(len, offset, blkbits);
>  
> -		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
> -					     flags);
> -		filemap_invalidate_unlock(mapping);
> +		ret = ext4_alloc_file_blocks(file, alloc_lblk, len_lblk,
> +					     new_size, flags);
>  		if (ret)
> -			goto out_mutex;
> +			goto out_invalidate_lock;
>  	}
> -	if (!partial_begin && !partial_end)
> -		goto out_mutex;
> +
> +	/* Zero range excluding the unaligned edges */
> +	start_lblk = round_up(offset, blocksize) >> blkbits;
> +	end_lblk = end >> blkbits;
> +	if (end_lblk > start_lblk) {
> +		ext4_lblk_t zero_blks = end_lblk - start_lblk;
> +
> +		flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
> +		ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
> +					     new_size, flags);
> +		if (ret)
> +			goto out_invalidate_lock;
> +	}
> +	/* Finish zeroing out if it doesn't contain partial block */
> +	if (!(offset & (blocksize - 1)) && !(end & (blocksize - 1)))
> +		goto out_invalidate_lock;
>  
>  	/*
>  	 * In worst case we have to writeout two nonadjacent unwritten
> @@ -4700,25 +4665,29 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>  	if (IS_ERR(handle)) {
>  		ret = PTR_ERR(handle);
>  		ext4_std_error(inode->i_sb, ret);
> -		goto out_mutex;
> +		goto out_invalidate_lock;
>  	}
>  
> +	/* Zero out partial block at the edges of the range */
> +	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
> +	if (ret)
> +		goto out_handle;
> +
>  	if (new_size)
>  		ext4_update_inode_size(inode, new_size);
>  	ret = ext4_mark_inode_dirty(handle, inode);
>  	if (unlikely(ret))
>  		goto out_handle;
> -	/* Zero out partial block at the edges of the range */
> -	ret = ext4_zero_partial_blocks(handle, inode, offset, len);
> -	if (ret >= 0)
> -		ext4_update_inode_fsync_trans(handle, inode, 1);
>  
> +	ext4_update_inode_fsync_trans(handle, inode, 1);
>  	if (file->f_flags & O_SYNC)
>  		ext4_handle_sync(handle);
>  
>  out_handle:
>  	ext4_journal_stop(handle);
> -out_mutex:
> +out_invalidate_lock:
> +	filemap_invalidate_unlock(mapping);
> +out:
>  	inode_unlock(inode);
>  	return ret;
>  }
> -- 
> 2.46.1
> 
-- 
Jan Kara <jack@...e.com>
SUSE Labs, CR

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ