lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <BBCE411D-E6D0-4826-8F4E-89A09E30D515@dilger.ca>
Date:   Sat, 2 Apr 2022 13:24:47 -0600
From:   Andreas Dilger <adilger@...ger.ca>
To:     anserper@...ru
Cc:     linux-ext4@...r.kernel.org,
        Andrew Perepechko <andrew.perepechko@....com>
Subject: Re: [PATCH v3] ext4: truncate during setxattr leads to kernel panic

On Apr 2, 2022, at 2:40 AM, anserper@...ru wrote:
> 
> From: Andrew Perepechko <andrew.perepechko@....com>
> 
> When changing a large xattr value to a different large xattr value,
> the old xattr inode is freed. Truncate during the final iput causes
> current transaction restart. Eventually, parent inode bh is marked
> dirty and kernel panic happens when jbd2 figures out that this bh
> belongs to the committed transaction.
> 
> A possible fix is to call this final iput in a separate thread.
> This way, setxattr transactions will never be split into two.
> Since the setxattr code adds xattr inodes with nlink=0 into the
> orphan list, old xattr inodes will be properly cleaned up in
> any case.
> 
> Signed-off-by: Andrew Perepechko <andrew.perepechko@....com>

Reviewed-by: Andreas Dilger <adilger@...ger.ca>

> HPE-bug-id: LUS-10534
> 
> Changes since v1:
> - fixed a bug added during the porting
> - fixed a workqueue related deadlock reported by Tetsuo Handa
> ---
> fs/ext4/ext4.h    |  7 +++++--
> fs/ext4/page-io.c |  2 +-
> fs/ext4/super.c   | 15 ++++++++-------
> fs/ext4/xattr.c   | 39 +++++++++++++++++++++++++++++++++++++--
> 4 files changed, 51 insertions(+), 12 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 3f87cca49f0c..52db5d6bae7f 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1650,8 +1650,11 @@ struct ext4_sb_info {
> 	struct flex_groups * __rcu *s_flex_groups;
> 	ext4_group_t s_flex_groups_allocated;
> 
> -	/* workqueue for reserved extent conversions (buffered io) */
> -	struct workqueue_struct *rsv_conversion_wq;
> +	/*
> +	 * workqueue for reserved extent conversions (buffered io)
> +	 * and large ea inodes reclaim
> +	 */
> +	struct workqueue_struct *s_misc_wq;
> 
> 	/* timer for periodic error stats printing */
> 	struct timer_list s_err_report;
> diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
> index 495ce59fb4ad..0142b88471ff 100644
> --- a/fs/ext4/page-io.c
> +++ b/fs/ext4/page-io.c
> @@ -228,7 +228,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end)
> 	WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
> 	WARN_ON(!io_end->handle && sbi->s_journal);
> 	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
> -	wq = sbi->rsv_conversion_wq;
> +	wq = sbi->s_misc_wq;
> 	if (list_empty(&ei->i_rsv_conversion_list))
> 		queue_work(wq, &ei->i_rsv_conversion_work);
> 	list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 81749eaddf4c..ee03f593b264 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1200,10 +1200,11 @@ static void ext4_put_super(struct super_block *sb)
> 	int i, err;
> 
> 	ext4_unregister_li_request(sb);
> +	flush_workqueue(sbi->s_misc_wq);
> 	ext4_quota_off_umount(sb);
> 
> 	flush_work(&sbi->s_error_work);
> -	destroy_workqueue(sbi->rsv_conversion_wq);
> +	destroy_workqueue(sbi->s_misc_wq);
> 	ext4_release_orphan_info(sb);
> 
> 	/*
> @@ -5294,9 +5295,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
> 	 * The maximum number of concurrent works can be high and
> 	 * concurrency isn't really necessary.  Limit it to 1.
> 	 */
> -	EXT4_SB(sb)->rsv_conversion_wq =
> -		alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
> -	if (!EXT4_SB(sb)->rsv_conversion_wq) {
> +	EXT4_SB(sb)->s_misc_wq =
> +		alloc_workqueue("ext4-misc", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
> +	if (!EXT4_SB(sb)->s_misc_wq) {
> 		printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
> 		ret = -ENOMEM;
> 		goto failed_mount4;
> @@ -5514,8 +5515,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
> 	sb->s_root = NULL;
> failed_mount4:
> 	ext4_msg(sb, KERN_ERR, "mount failed");
> -	if (EXT4_SB(sb)->rsv_conversion_wq)
> -		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
> +	if (EXT4_SB(sb)->s_misc_wq)
> +		destroy_workqueue(EXT4_SB(sb)->s_misc_wq);
> failed_mount_wq:
> 	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
> 	sbi->s_ea_inode_cache = NULL;
> @@ -6129,7 +6130,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
> 		return 0;
> 
> 	trace_ext4_sync_fs(sb, wait);
> -	flush_workqueue(sbi->rsv_conversion_wq);
> +	flush_workqueue(sbi->s_misc_wq);
> 	/*
> 	 * Writeback quota in non-journalled quota case - journalled quota has
> 	 * no dirty dquots
> diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
> index 042325349098..ee13675fbead 100644
> --- a/fs/ext4/xattr.c
> +++ b/fs/ext4/xattr.c
> @@ -1544,6 +1544,36 @@ static int ext4_xattr_inode_lookup_create(handle_t *handle, struct inode *inode,
> 	return 0;
> }
> 
> +struct delayed_iput_work {
> +	struct work_struct work;
> +	struct inode *inode;
> +};
> +
> +static void delayed_iput_fn(struct work_struct *work)
> +{
> +	struct delayed_iput_work *diwork;
> +
> +	diwork = container_of(work, struct delayed_iput_work, work);
> +	iput(diwork->inode);
> +	kfree(diwork);
> +}
> +
> +static void delayed_iput(struct inode *inode, struct delayed_iput_work *work)
> +{
> +	if (!inode) {
> +		kfree(work);
> +		return;
> +	}
> +
> +	if (!work) {
> +		iput(inode);
> +	} else {
> +		INIT_WORK(&work->work, delayed_iput_fn);
> +		work->inode = inode;
> +		queue_work(EXT4_SB(inode->i_sb)->s_misc_wq, &work->work);
> +	}
> +}
> +
> /*
>  * Reserve min(block_size/8, 1024) bytes for xattr entries/names if ea_inode
>  * feature is enabled.
> @@ -1561,6 +1591,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> 	int in_inode = i->in_inode;
> 	struct inode *old_ea_inode = NULL;
> 	struct inode *new_ea_inode = NULL;
> +	struct delayed_iput_work *diwork = NULL;
> 	size_t old_size, new_size;
> 	int ret;
> 
> @@ -1637,7 +1668,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> 	 * Finish that work before doing any modifications to the xattr data.
> 	 */
> 	if (!s->not_found && here->e_value_inum) {
> -		ret = ext4_xattr_inode_iget(inode,
> +		diwork = kmalloc(sizeof(*diwork), GFP_NOFS);
> +		if (!diwork)
> +			ret = -ENOMEM;
> +		else
> +			ret = ext4_xattr_inode_iget(inode,
> 					    le32_to_cpu(here->e_value_inum),
> 					    le32_to_cpu(here->e_hash),
> 					    &old_ea_inode);
> @@ -1790,7 +1825,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
> 
> 	ret = 0;
> out:
> -	iput(old_ea_inode);
> +	delayed_iput(old_ea_inode, diwork);
> 	iput(new_ea_inode);
> 	return ret;
> }
> --
> 2.25.1
> 


Cheers, Andreas






Download attachment "signature.asc" of type "application/pgp-signature" (874 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ