[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200220091548.GB13232@quack2.suse.cz>
Date: Thu, 20 Feb 2020 10:15:48 +0100
From: Jan Kara <jack@...e.cz>
To: Eric Biggers <ebiggers@...nel.org>
Cc: linux-ext4@...r.kernel.org, Theodore Ts'o <tytso@....edu>,
Jan Kara <jack@...e.cz>
Subject: Re: [PATCH v3 2/2] ext4: fix race between writepages and enabling
EXT4_EXTENTS_FL
On Wed 19-02-20 10:30:47, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@...gle.com>
>
> If EXT4_EXTENTS_FL is set on an inode while ext4_writepages() is running
> on it, the following warning in ext4_add_complete_io() can be hit:
>
> WARNING: CPU: 1 PID: 0 at fs/ext4/page-io.c:234 ext4_put_io_end_defer+0xf0/0x120
>
> Here's a minimal reproducer (not 100% reliable) (root isn't required):
>
> while true; do
> sync
> done &
> while true; do
> rm -f file
> touch file
> chattr -e file
> echo X >> file
> chattr +e file
> done
>
> The problem is that in ext4_writepages(), ext4_should_dioread_nolock()
> (which only returns true on extent-based files) is checked once to set
> the number of reserved journal credits, and also again later to select
> the flags for ext4_map_blocks() and copy the reserved journal handle to
> ext4_io_end::handle. But if EXT4_EXTENTS_FL is being concurrently set,
> the first check can see dioread_nolock disabled while the later one can
> see it enabled, causing the reserved handle to unexpectedly be NULL.
>
> Since changing EXT4_EXTENTS_FL is uncommon, and there may be other races
> related to doing so as well, fix this by synchronizing changing
> EXT4_EXTENTS_FL with ext4_writepages() via the existing
> s_writepages_rwsem (previously called s_journal_flag_rwsem).
>
> This was originally reported by syzbot without a reproducer at
> https://syzkaller.appspot.com/bug?extid=2202a584a00fffd19fbf,
> but now that dioread_nolock is the default I also started seeing this
> when running syzkaller locally.
>
> Reported-by: syzbot+2202a584a00fffd19fbf@...kaller.appspotmail.com
> Fixes: 6b523df4fb5a ("ext4: use transaction reservation for extent conversion in ext4_end_io")
> Cc: stable@...nel.org
> Signed-off-by: Eric Biggers <ebiggers@...gle.com>
The patch looks good to me. You can add:
Reviewed-by: Jan Kara <jack@...e.cz>
Honza
> ---
> fs/ext4/ext4.h | 5 ++++-
> fs/ext4/migrate.c | 27 +++++++++++++++++++--------
> 2 files changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 487a7b430b9dd..0a59006c621a0 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1552,7 +1552,10 @@ struct ext4_sb_info {
> struct ratelimit_state s_warning_ratelimit_state;
> struct ratelimit_state s_msg_ratelimit_state;
>
> - /* Barrier between changing inodes' journal flags and writepages ops. */
> + /*
> + * Barrier between writepages ops and changing any inode's JOURNAL_DATA
> + * or EXTENTS flag.
> + */
> struct percpu_rw_semaphore s_writepages_rwsem;
> struct dax_device *s_daxdev;
> #ifdef CONFIG_EXT4_DEBUG
> diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
> index 89725fa425732..fb6520f371355 100644
> --- a/fs/ext4/migrate.c
> +++ b/fs/ext4/migrate.c
> @@ -407,6 +407,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
>
> int ext4_ext_migrate(struct inode *inode)
> {
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> handle_t *handle;
> int retval = 0, i;
> __le32 *i_data;
> @@ -431,6 +432,8 @@ int ext4_ext_migrate(struct inode *inode)
> */
> return retval;
>
> + percpu_down_write(&sbi->s_writepages_rwsem);
> +
> /*
> * Worst case we can touch the allocation bitmaps, a bgd
> * block, and a block to link in the orphan list. We do need
> @@ -441,7 +444,7 @@ int ext4_ext_migrate(struct inode *inode)
>
> if (IS_ERR(handle)) {
> retval = PTR_ERR(handle);
> - return retval;
> + goto out_unlock;
> }
> goal = (((inode->i_ino - 1) / EXT4_INODES_PER_GROUP(inode->i_sb)) *
> EXT4_INODES_PER_GROUP(inode->i_sb)) + 1;
> @@ -452,7 +455,7 @@ int ext4_ext_migrate(struct inode *inode)
> if (IS_ERR(tmp_inode)) {
> retval = PTR_ERR(tmp_inode);
> ext4_journal_stop(handle);
> - return retval;
> + goto out_unlock;
> }
> i_size_write(tmp_inode, i_size_read(inode));
> /*
> @@ -494,7 +497,7 @@ int ext4_ext_migrate(struct inode *inode)
> */
> ext4_orphan_del(NULL, tmp_inode);
> retval = PTR_ERR(handle);
> - goto out;
> + goto out_tmp_inode;
> }
>
> ei = EXT4_I(inode);
> @@ -576,10 +579,11 @@ int ext4_ext_migrate(struct inode *inode)
> ext4_ext_tree_init(handle, tmp_inode);
> out_stop:
> ext4_journal_stop(handle);
> -out:
> +out_tmp_inode:
> unlock_new_inode(tmp_inode);
> iput(tmp_inode);
> -
> +out_unlock:
> + percpu_up_write(&sbi->s_writepages_rwsem);
> return retval;
> }
>
> @@ -589,7 +593,8 @@ int ext4_ext_migrate(struct inode *inode)
> int ext4_ind_migrate(struct inode *inode)
> {
> struct ext4_extent_header *eh;
> - struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
> + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
> + struct ext4_super_block *es = sbi->s_es;
> struct ext4_inode_info *ei = EXT4_I(inode);
> struct ext4_extent *ex;
> unsigned int i, len;
> @@ -613,9 +618,13 @@ int ext4_ind_migrate(struct inode *inode)
> if (test_opt(inode->i_sb, DELALLOC))
> ext4_alloc_da_blocks(inode);
>
> + percpu_down_write(&sbi->s_writepages_rwsem);
> +
> handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
> - if (IS_ERR(handle))
> - return PTR_ERR(handle);
> + if (IS_ERR(handle)) {
> + ret = PTR_ERR(handle);
> + goto out_unlock;
> + }
>
> down_write(&EXT4_I(inode)->i_data_sem);
> ret = ext4_ext_check_inode(inode);
> @@ -650,5 +659,7 @@ int ext4_ind_migrate(struct inode *inode)
> errout:
> ext4_journal_stop(handle);
> up_write(&EXT4_I(inode)->i_data_sem);
> +out_unlock:
> + percpu_up_write(&sbi->s_writepages_rwsem);
> return ret;
> }
> --
> 2.25.0
>
--
Jan Kara <jack@...e.com>
SUSE Labs, CR
Powered by blists - more mailing lists