[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3dfb3cf4-6b6d-31bb-236e-6234ded94b03@huawei.com>
Date: Fri, 11 Nov 2022 09:50:38 +0800
From: ChenXiaoSong <chenxiaosong2@...wei.com>
To: Qu Wenruo <quwenruo.btrfs@....com>, <clm@...com>,
<josef@...icpanda.com>, <dsterba@...e.com>
CC: <linux-btrfs@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
<yi.zhang@...wei.com>, <zhangxiaoxu5@...wei.com>
Subject: Re: [PATCH] btrfs: qgroup: fix sleep from invalid context bug in
update_qgroup_limit_item()
Yes, at least two places will sleep in btrfs_search_slot():
update_qgroup_limit_item
btrfs_search_slot
setup_nodes_for_search
reada_for_balance
btrfs_readahead_node_child
btrfs_readahead_tree_block
btrfs_find_create_tree_block
alloc_extent_buffer
kmem_cache_zalloc
/* will sleep */
kmem_cache_alloc(GFP_NOFS|__GFP_NOFAIL|__GFP_ZERO)
read_extent_buffer_pages
submit_extent_page
submit_one_bio /* disk IO, will sleep */
在 2022/11/11 6:58, Qu Wenruo 写道:
>
>
> On 2022/11/10 22:13, ChenXiaoSong wrote:
>> Syzkaller reported BUG as follows:
>>
>> BUG: sleeping function called from invalid context at
>> include/linux/sched/mm.h:274
>> Call Trace:
>> <TASK>
>> dump_stack_lvl+0xcd/0x134
>> __might_resched.cold+0x222/0x26b
>> kmem_cache_alloc+0x2e7/0x3c0
>> update_qgroup_limit_item+0xe1/0x390
>> btrfs_qgroup_inherit+0x147b/0x1ee0
>> create_subvol+0x4eb/0x1710
>> btrfs_mksubvol+0xfe5/0x13f0
>> __btrfs_ioctl_snap_create+0x2b0/0x430
>> btrfs_ioctl_snap_create_v2+0x25a/0x520
>> btrfs_ioctl+0x2a1c/0x5ce0
>> __x64_sys_ioctl+0x193/0x200
>> do_syscall_64+0x35/0x80
>>
>> Fix this by introducing __update_qgroup_limit_item() helper, allocate
>> memory outside of the spin lock.
>>
>> Signed-off-by: ChenXiaoSong <chenxiaosong2@...wei.com>
>
> Unfortunately, __update_qgroup_limit_item() can still sleep.
>
> As it calls btrfs_search_slot(), which can lead to disk IO if the qgroup
> tree is not cached.
>
>
> I believe the proper way is to either unlock the spinlock inside
> btrfs_qgroup_inherit() (which needs extra scrutiny on the qgroup lock),
> or delayed the limit item updates until we have unlocked the spinlock.
>
> To me, the latter one seems more reasonable, as it's just one qgroup
> (@dstgroup), and we're doing the same delayed work for sysfs interface
> creation.
>
> Thanks,
> Qu
>
>> ---
>> fs/btrfs/qgroup.c | 35 ++++++++++++++++++++++++++---------
>> 1 file changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
>> index 9334c3157c22..99a61cc04b68 100644
>> --- a/fs/btrfs/qgroup.c
>> +++ b/fs/btrfs/qgroup.c
>> @@ -768,11 +768,11 @@ static int del_qgroup_item(struct
>> btrfs_trans_handle *trans, u64 qgroupid)
>> return ret;
>> }
>> -static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
>> - struct btrfs_qgroup *qgroup)
>> +static int __update_qgroup_limit_item(struct btrfs_trans_handle *trans,
>> + struct btrfs_qgroup *qgroup,
>> + struct btrfs_path *path) > {
>> struct btrfs_root *quota_root = trans->fs_info->quota_root;
>> - struct btrfs_path *path;
>> struct btrfs_key key;
>> struct extent_buffer *l;
>> struct btrfs_qgroup_limit_item *qgroup_limit;
>> @@ -783,10 +783,6 @@ static int update_qgroup_limit_item(struct
>> btrfs_trans_handle *trans,
>> key.type = BTRFS_QGROUP_LIMIT_KEY;
>> key.offset = qgroup->qgroupid;
>> - path = btrfs_alloc_path();
>> - if (!path)
>> - return -ENOMEM;
>> -
>> ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
>> if (ret > 0)
>> ret = -ENOENT;
>> @@ -806,6 +802,21 @@ static int update_qgroup_limit_item(struct
>> btrfs_trans_handle *trans,
>> btrfs_mark_buffer_dirty(l);
>> out:
>> + return ret;
>> +}
>> +
>> +static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
>> + struct btrfs_qgroup *qgroup)
>> +{
>> + struct btrfs_path *path;
>> + int ret;
>> +
>> + path = btrfs_alloc_path();
>> + if (!path)
>> + return -ENOMEM;
>> +
>> + ret = __update_qgroup_limit_item(trans, qgroup, path);
>> +
>> btrfs_free_path(path);
>> return ret;
>> }
>> @@ -2860,6 +2871,7 @@ int btrfs_qgroup_inherit(struct
>> btrfs_trans_handle *trans, u64 srcid,
>> bool need_rescan = false;
>> u32 level_size = 0;
>> u64 nums;
>> + struct btrfs_path *path;
>> /*
>> * There are only two callers of this function.
>> @@ -2935,6 +2947,11 @@ int btrfs_qgroup_inherit(struct
>> btrfs_trans_handle *trans, u64 srcid,
>> ret = 0;
>> }
>> + path = btrfs_alloc_path();
>> + if (!path) {
>> + ret = -ENOMEM;
>> + goto out;
>> + }
>> spin_lock(&fs_info->qgroup_lock);
>> @@ -2950,8 +2967,7 @@ int btrfs_qgroup_inherit(struct
>> btrfs_trans_handle *trans, u64 srcid,
>> dstgroup->max_excl = inherit->lim.max_excl;
>> dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
>> dstgroup->rsv_excl = inherit->lim.rsv_excl;
>> -
>> - ret = update_qgroup_limit_item(trans, dstgroup);
>> + ret = __update_qgroup_limit_item(trans, dstgroup, path);
>> if (ret) {
>> qgroup_mark_inconsistent(fs_info);
>> btrfs_info(fs_info,
>> @@ -3053,6 +3069,7 @@ int btrfs_qgroup_inherit(struct
>> btrfs_trans_handle *trans, u64 srcid,
>> unlock:
>> spin_unlock(&fs_info->qgroup_lock);
>> + btrfs_free_path(path);
>> if (!ret)
>> ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
>> out:
>
> .
Powered by blists - more mailing lists