lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 11 Nov 2022 09:50:38 +0800
From:   ChenXiaoSong <chenxiaosong2@...wei.com>
To:     Qu Wenruo <quwenruo.btrfs@....com>, <clm@...com>,
        <josef@...icpanda.com>, <dsterba@...e.com>
CC:     <linux-btrfs@...r.kernel.org>, <linux-kernel@...r.kernel.org>,
        <yi.zhang@...wei.com>, <zhangxiaoxu5@...wei.com>
Subject: Re: [PATCH] btrfs: qgroup: fix sleep from invalid context bug in
 update_qgroup_limit_item()

Yes, at least two places will sleep in btrfs_search_slot():

   update_qgroup_limit_item
     btrfs_search_slot
       setup_nodes_for_search
         reada_for_balance
           btrfs_readahead_node_child
             btrfs_readahead_tree_block
               btrfs_find_create_tree_block
                 alloc_extent_buffer
                   kmem_cache_zalloc
                     /* will sleep */
                     kmem_cache_alloc(GFP_NOFS|__GFP_NOFAIL|__GFP_ZERO)
               read_extent_buffer_pages
                 submit_extent_page
                   submit_one_bio /* disk IO, will sleep */

在 2022/11/11 6:58, Qu Wenruo 写道:
> 
> 
> On 2022/11/10 22:13, ChenXiaoSong wrote:
>> Syzkaller reported BUG as follows:
>>
>>    BUG: sleeping function called from invalid context at
>>         include/linux/sched/mm.h:274
>>    Call Trace:
>>     <TASK>
>>     dump_stack_lvl+0xcd/0x134
>>     __might_resched.cold+0x222/0x26b
>>     kmem_cache_alloc+0x2e7/0x3c0
>>     update_qgroup_limit_item+0xe1/0x390
>>     btrfs_qgroup_inherit+0x147b/0x1ee0
>>     create_subvol+0x4eb/0x1710
>>     btrfs_mksubvol+0xfe5/0x13f0
>>     __btrfs_ioctl_snap_create+0x2b0/0x430
>>     btrfs_ioctl_snap_create_v2+0x25a/0x520
>>     btrfs_ioctl+0x2a1c/0x5ce0
>>     __x64_sys_ioctl+0x193/0x200
>>     do_syscall_64+0x35/0x80
>>
>> Fix this by introducing __update_qgroup_limit_item() helper, allocate
>> memory outside of the spin lock.
>>
>> Signed-off-by: ChenXiaoSong <chenxiaosong2@...wei.com>
> 
> Unfortunately, __update_qgroup_limit_item() can still sleep.
> 
> As it calls btrfs_search_slot(), which can lead to disk IO if the qgroup 
> tree is not cached.
> 
> 
> I believe the proper way is to either unlock the spinlock inside 
> btrfs_qgroup_inherit() (which needs extra scrutiny on the qgroup lock), 
> or delayed the limit item updates until we have unlocked the spinlock.
> 
> To me, the latter one seems more reasonable, as it's just one qgroup 
> (@dstgroup), and we're doing the same delayed work for sysfs interface 
> creation.
> 
> Thanks,
> Qu
> 
>> ---
>>   fs/btrfs/qgroup.c | 35 ++++++++++++++++++++++++++---------
>>   1 file changed, 26 insertions(+), 9 deletions(-)
>>
>> diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
>> index 9334c3157c22..99a61cc04b68 100644
>> --- a/fs/btrfs/qgroup.c
>> +++ b/fs/btrfs/qgroup.c
>> @@ -768,11 +768,11 @@ static int del_qgroup_item(struct 
>> btrfs_trans_handle *trans, u64 qgroupid)
>>       return ret;
>>   }
>> -static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
>> -                    struct btrfs_qgroup *qgroup)
>> +static int __update_qgroup_limit_item(struct btrfs_trans_handle *trans,
>> +                      struct btrfs_qgroup *qgroup,
>> +                      struct btrfs_path *path) >   {
>>       struct btrfs_root *quota_root = trans->fs_info->quota_root;
>> -    struct btrfs_path *path;
>>       struct btrfs_key key;
>>       struct extent_buffer *l;
>>       struct btrfs_qgroup_limit_item *qgroup_limit;
>> @@ -783,10 +783,6 @@ static int update_qgroup_limit_item(struct 
>> btrfs_trans_handle *trans,
>>       key.type = BTRFS_QGROUP_LIMIT_KEY;
>>       key.offset = qgroup->qgroupid;
>> -    path = btrfs_alloc_path();
>> -    if (!path)
>> -        return -ENOMEM;
>> -
>>       ret = btrfs_search_slot(trans, quota_root, &key, path, 0, 1);
>>       if (ret > 0)
>>           ret = -ENOENT;
>> @@ -806,6 +802,21 @@ static int update_qgroup_limit_item(struct 
>> btrfs_trans_handle *trans,
>>       btrfs_mark_buffer_dirty(l);
>>   out:
>> +    return ret;
>> +}
>> +
>> +static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
>> +                    struct btrfs_qgroup *qgroup)
>> +{
>> +    struct btrfs_path *path;
>> +    int ret;
>> +
>> +    path = btrfs_alloc_path();
>> +    if (!path)
>> +        return -ENOMEM;
>> +
>> +    ret = __update_qgroup_limit_item(trans, qgroup, path);
>> +
>>       btrfs_free_path(path);
>>       return ret;
>>   }
>> @@ -2860,6 +2871,7 @@ int btrfs_qgroup_inherit(struct 
>> btrfs_trans_handle *trans, u64 srcid,
>>       bool need_rescan = false;
>>       u32 level_size = 0;
>>       u64 nums;
>> +    struct btrfs_path *path;
>>       /*
>>        * There are only two callers of this function.
>> @@ -2935,6 +2947,11 @@ int btrfs_qgroup_inherit(struct 
>> btrfs_trans_handle *trans, u64 srcid,
>>           ret = 0;
>>       }
>> +    path = btrfs_alloc_path();
>> +    if (!path) {
>> +        ret = -ENOMEM;
>> +        goto out;
>> +    }
>>       spin_lock(&fs_info->qgroup_lock);
>> @@ -2950,8 +2967,7 @@ int btrfs_qgroup_inherit(struct 
>> btrfs_trans_handle *trans, u64 srcid,
>>           dstgroup->max_excl = inherit->lim.max_excl;
>>           dstgroup->rsv_rfer = inherit->lim.rsv_rfer;
>>           dstgroup->rsv_excl = inherit->lim.rsv_excl;
>> -
>> -        ret = update_qgroup_limit_item(trans, dstgroup);
>> +        ret = __update_qgroup_limit_item(trans, dstgroup, path);
>>           if (ret) {
>>               qgroup_mark_inconsistent(fs_info);
>>               btrfs_info(fs_info,
>> @@ -3053,6 +3069,7 @@ int btrfs_qgroup_inherit(struct 
>> btrfs_trans_handle *trans, u64 srcid,
>>   unlock:
>>       spin_unlock(&fs_info->qgroup_lock);
>> +    btrfs_free_path(path);
>>       if (!ret)
>>           ret = btrfs_sysfs_add_one_qgroup(fs_info, dstgroup);
>>   out:
> 
> .

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ