[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a92ca042-b981-4f35-beec-ebf416e4239b@leemhuis.info>
Date: Tue, 6 Feb 2024 15:46:33 +0100
From: "Linux regression tracking (Thorsten Leemhuis)"
<regressions@...mhuis.info>
To: Yu Kuai <yukuai1@...weicloud.com>, linan666@...weicloud.com,
song@...nel.org
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, houtao1@...wei.com, yangerkun@...wei.com,
"yukuai (C)" <yukuai3@...wei.com>,
Linux kernel regressions list <regressions@...ts.linux.dev>
Subject: Re: [PATCH 2/2] md: create symlink with disk holder after mddev
resume
Hi, Thorsten here, the Linux kernel's regression tracker.
On 21.12.23 09:49, Yu Kuai wrote:
> 在 2023/12/21 15:11, linan666@...weicloud.com 写道:
>> From: Li Nan <linan122@...wei.com>
>>
>> There is a risk of deadlock when a process gets disk->open_mutex after
>> suspending mddev, because other processes may hold open_mutex while
>> submitting io. For example:
>> [...]
> Nice catch! This patch looks good except that the new flag
> 'SymlinkCreated' doesn't look accurate, perhaps 'HolderLinked'
> will make more sense.
>
>> Fix it by getting disk->open_mutex after mddev resume, iterating each
>> mddev->disk to create symlink for rdev which has not been created yet.
>> and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been
>> deleted from mddev->disks here, which can avoid concurrent bind and
>> unbind,
>>
>> Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls
>> involed array reconfiguration")
Hey, what happened to that patch? It looks a lot like things stalled
here. I'm asking, because there is a regression report that claims
1b0a2d950ee2 to be the culprit that might or might not be causes by the
problem this patch tries to fix:
https://bugzilla.kernel.org/show_bug.cgi?id=218459
Ciao, Thorsten
>> Signed-off-by: Li Nan <linan122@...wei.com>
>> ---
>> drivers/md/md.c | 39 +++++++++++++++++++++++++++++----------
>> 1 file changed, 29 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index d6612b922c76..c128570f2a5d 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev)
>> }
>> EXPORT_SYMBOL_GPL(mddev_resume);
>> +static void md_link_disk_holder(struct mddev *mddev)
>> +{
>> + struct md_rdev *rdev;
>> +
>> + rcu_read_lock();
>> + rdev_for_each_rcu(rdev, mddev) {
>> + if (test_bit(SymlinkCreated, &rdev->flags))
>> + continue;
>> + if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
>> + set_bit(SymlinkCreated, &rdev->flags);
>> + }
>> + rcu_read_unlock();
>> +}
>> +
>> /*
>> * Generic flush handling for md
>> */
>> @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev)
>> list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
>> list_del_init(&rdev->same_set);
>> + if (test_bit(SymlinkCreated, &rdev->flags)) {
>> + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
>> + clear_bit(SymlinkCreated, &rdev->flags);
>> + }
>> + rdev->mddev = NULL;
>> kobject_del(&rdev->kobj);
>> export_rdev(rdev, mddev);
>> }
>> @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev
>> *rdev, struct mddev *mddev)
>> sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
>> list_add_rcu(&rdev->same_set, &mddev->disks);
>> - if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
>> - set_bit(SymlinkCreated, &rdev->flags);
>> /* May as well allow recovery to be retried once */
>> mddev->recovery_disabled++;
>> @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct
>> md_rdev *rdev)
>> {
>> struct mddev *mddev = rdev->mddev;
>> - if (test_bit(SymlinkCreated, &rdev->flags)) {
>> - bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
>> - clear_bit(SymlinkCreated, &rdev->flags);
>> - }
>> list_del_rcu(&rdev->same_set);
>> pr_debug("md: unbind<%pg>\n", rdev->bdev);
>> mddev_destroy_serial_pool(rdev->mddev, rdev);
>> - rdev->mddev = NULL;
>> sysfs_remove_link(&rdev->kobj, "block");
>> sysfs_put(rdev->sysfs_state);
>> sysfs_put(rdev->sysfs_unack_badblocks);
>> @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char
>> *buf, size_t len)
>> if (err)
>> export_rdev(rdev, mddev);
>> mddev_unlock_and_resume(mddev);
>> - if (!err)
>> + if (!err) {
>> + md_link_disk_holder(mddev);
>> md_new_event();
>> + }
>> return err ? err : len;
>> }
>> @@ -6606,6 +6620,7 @@ static void autorun_devices(int part)
>> }
>> autorun_array(mddev);
>> mddev_unlock_and_resume(mddev);
>> + md_link_disk_holder(mddev);
>> }
>> /* on success, candidates will be empty, on error
>> * it won't...
>> @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev,
>> blk_mode_t mode,
>> err != -EINVAL)
>> mddev->hold_active = 0;
>> - md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
>> - mddev_unlock(mddev);
>> + if (md_ioctl_need_suspend(cmd)) {
>> + mddev_unlock_and_resume(mddev);
>> + md_link_disk_holder(mddev);
>> + } else {
>> + mddev_unlock(mddev);
>> + }
>> out:
>> if(did_set_md_closing)
>>
>
Powered by blists - more mailing lists