[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e5788348-b547-8e10-21af-90544f3aa75c@kernel.org>
Date: Sun, 25 Jun 2023 15:26:38 +0800
From: Chao Yu <chao@...nel.org>
To: Jaegeuk Kim <jaegeuk@...nel.org>, linux-kernel@...r.kernel.org,
linux-f2fs-devel@...ts.sourceforge.net
Cc: stable@...r.kernel.org
Subject: Re: [f2fs-dev] [PATCH] f2fs: remove i_xattr_sem to avoid deadlock and
fix the original issue
On 2023/6/14 7:39, Jaegeuk Kim wrote:
> This reverts commit 27161f13e3c3 "f2fs: avoid race in between read xattr & write xattr".
>
> That introduced a deadlock case:
>
> Thread #1:
>
> [122554.641906][ T92] f2fs_getxattr+0xd4/0x5fc
> -> waiting for f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
>
> [122554.641927][ T92] __f2fs_get_acl+0x50/0x284
> [122554.641948][ T92] f2fs_init_acl+0x84/0x54c
> [122554.641969][ T92] f2fs_init_inode_metadata+0x460/0x5f0
> [122554.641990][ T92] f2fs_add_inline_entry+0x11c/0x350
> -> Locked dir->inode_page by f2fs_get_node_page()
>
> [122554.642009][ T92] f2fs_do_add_link+0x100/0x1e4
> [122554.642025][ T92] f2fs_create+0xf4/0x22c
> [122554.642047][ T92] vfs_create+0x130/0x1f4
>
> Thread #2:
>
> [123996.386358][ T92] __get_node_page+0x8c/0x504
> -> waiting for dir->inode_page lock
>
> [123996.386383][ T92] read_all_xattrs+0x11c/0x1f4
> [123996.386405][ T92] __f2fs_setxattr+0xcc/0x528
> [123996.386424][ T92] f2fs_setxattr+0x158/0x1f4
> -> f2fs_down_write(&F2FS_I(inode)->i_xattr_sem);
>
> [123996.386443][ T92] __f2fs_set_acl+0x328/0x430
> [123996.386618][ T92] f2fs_set_acl+0x38/0x50
> [123996.386642][ T92] posix_acl_chmod+0xc8/0x1c8
> [123996.386669][ T92] f2fs_setattr+0x5e0/0x6bc
> [123996.386689][ T92] notify_change+0x4d8/0x580
> [123996.386717][ T92] chmod_common+0xd8/0x184
> [123996.386748][ T92] do_fchmodat+0x60/0x124
> [123996.386766][ T92] __arm64_sys_fchmodat+0x28/0x3c
>
> Let's take a look at the original issue back.
>
> Thread A: Thread B:
> -f2fs_getxattr
> -lookup_all_xattrs
> -xnid = F2FS_I(inode)->i_xattr_nid;
> -f2fs_setxattr
> -__f2fs_setxattr
> -write_all_xattrs
> -truncate_xattr_node
> ... ...
> -write_checkpoint
> ... ...
> -alloc_nid <- nid reuse
> -get_node_page
> -f2fs_bug_on <- nid != node_footer->nid
One concern below:
Thread A: Thread B:
- f2fs_getxattr
- lookup_all_xattrs
- read_inline_xattr
- f2fs_get_node_page(ino)
- memcpy inline xattr
- f2fs_put_page
- f2fs_setxattr
- __f2fs_setxattr
- __f2fs_setxattr
- write_all_xattrs
- write xnode and inode
---> inline xattr may out of update here.
- read_xattr_block
- f2fs_get_node_page(xnid)
- memcpy xnode xattr
- f2fs_put_page
Do we need to keep xattr_{get,set} being atomical operation?
Thanks,
>
> I think we don't need to truncate xattr pages eagerly which introduces lots of
> data races without big benefits.
>
> Cc: <stable@...r.kernel.org>
> Signed-off-by: Jaegeuk Kim <jaegeuk@...nel.org>
> ---
> fs/f2fs/f2fs.h | 1 -
> fs/f2fs/super.c | 1 -
> fs/f2fs/xattr.c | 31 ++++++++-----------------------
> 3 files changed, 8 insertions(+), 25 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 3f5b161dd743..7b9af2d51656 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -838,7 +838,6 @@ struct f2fs_inode_info {
>
> /* avoid racing between foreground op and gc */
> struct f2fs_rwsem i_gc_rwsem[2];
> - struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */
>
> int i_extra_isize; /* size of extra space located in i_addr */
> kprojid_t i_projid; /* id for project quota */
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 1b2c788ed80d..c917fa771f0e 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1418,7 +1418,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
> INIT_LIST_HEAD(&fi->gdirty_list);
> init_f2fs_rwsem(&fi->i_gc_rwsem[READ]);
> init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]);
> - init_f2fs_rwsem(&fi->i_xattr_sem);
>
> /* Will be used by directory only */
> fi->i_dir_level = F2FS_SB(sb)->dir_level;
> diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
> index 213805d3592c..bdc8a55085a2 100644
> --- a/fs/f2fs/xattr.c
> +++ b/fs/f2fs/xattr.c
> @@ -433,7 +433,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
> {
> struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> size_t inline_size = inline_xattr_size(inode);
> - struct page *in_page = NULL;
> + struct page *in_page = ipage;
> void *xattr_addr;
> void *inline_addr = NULL;
> struct page *xpage;
> @@ -446,29 +446,19 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
>
> /* write to inline xattr */
> if (inline_size) {
> - if (ipage) {
> - inline_addr = inline_xattr_addr(inode, ipage);
> - } else {
> + if (!in_page) {
> in_page = f2fs_get_node_page(sbi, inode->i_ino);
> if (IS_ERR(in_page)) {
> f2fs_alloc_nid_failed(sbi, new_nid);
> return PTR_ERR(in_page);
> }
> - inline_addr = inline_xattr_addr(inode, in_page);
> }
> + inline_addr = inline_xattr_addr(inode, in_page);
>
> - f2fs_wait_on_page_writeback(ipage ? ipage : in_page,
> - NODE, true, true);
> - /* no need to use xattr node block */
> + f2fs_wait_on_page_writeback(in_page, NODE, true, true);
> if (hsize <= inline_size) {
> - err = f2fs_truncate_xattr_node(inode);
> - f2fs_alloc_nid_failed(sbi, new_nid);
> - if (err) {
> - f2fs_put_page(in_page, 1);
> - return err;
> - }
> memcpy(inline_addr, txattr_addr, inline_size);
> - set_page_dirty(ipage ? ipage : in_page);
> + set_page_dirty(in_page);
> goto in_page_out;
> }
> }
> @@ -502,12 +492,13 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
> memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE);
>
> if (inline_size)
> - set_page_dirty(ipage ? ipage : in_page);
> + set_page_dirty(in_page);
> set_page_dirty(xpage);
>
> f2fs_put_page(xpage, 1);
> in_page_out:
> - f2fs_put_page(in_page, 1);
> + if (in_page != ipage)
> + f2fs_put_page(in_page, 1);
> return err;
> }
>
> @@ -528,10 +519,8 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
> if (len > F2FS_NAME_LEN)
> return -ERANGE;
>
> - f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
> error = lookup_all_xattrs(inode, ipage, index, len, name,
> &entry, &base_addr, &base_size, &is_inline);
> - f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
> if (error)
> return error;
>
> @@ -565,9 +554,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
> int error;
> size_t rest = buffer_size;
>
> - f2fs_down_read(&F2FS_I(inode)->i_xattr_sem);
> error = read_all_xattrs(inode, NULL, &base_addr);
> - f2fs_up_read(&F2FS_I(inode)->i_xattr_sem);
> if (error)
> return error;
>
> @@ -794,9 +781,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name,
> f2fs_balance_fs(sbi, true);
>
> f2fs_lock_op(sbi);
> - f2fs_down_write(&F2FS_I(inode)->i_xattr_sem);
> err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags);
> - f2fs_up_write(&F2FS_I(inode)->i_xattr_sem);
> f2fs_unlock_op(sbi);
>
> f2fs_update_time(sbi, REQ_TIME);
Powered by blists - more mailing lists