[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <3b240652-580e-73d5-a318-612984902aad@huaweicloud.com>
Date: Thu, 21 Dec 2023 16:49:18 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: linan666@...weicloud.com, song@...nel.org
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, houtao1@...wei.com, yangerkun@...wei.com,
"yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH 2/2] md: create symlink with disk holder after mddev
resume
Hi,
在 2023/12/21 15:11, linan666@...weicloud.com 写道:
> From: Li Nan <linan122@...wei.com>
>
> There is a risk of deadlock when a process gets disk->open_mutex after
> suspending mddev, because other processes may hold open_mutex while
> submitting io. For example:
>
> T1 T2
> blkdev_open
> bdev_open_by_dev
> mutex_lock(&disk->open_mutex)
> md_ioctl
> mddev_suspend_and_lock
> mddev_suspend
> md_add_new_disk
> bind_rdev_to_array
> bd_link_disk_holder
> //wait open_mutex
> blkdev_get_whole
> bdev_disk_changed
> efi_partition
> read_lba
> ...
> md_submit_bio
> md_handle_request
> //wait resume
>
Nice catch! This patch looks good except that the new flag
'SymlinkCreated' doesn't look accurate, perhaps 'HolderLinked'
will make more sense.
Thanks,
Kuai
> Fix it by getting disk->open_mutex after mddev resume, iterating each
> mddev->disk to create symlink for rdev which has not been created yet.
> and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been
> deleted from mddev->disks here, which can avoid concurrent bind and unbind,
>
> Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls involed array reconfiguration")
> Signed-off-by: Li Nan <linan122@...wei.com>
> ---
> drivers/md/md.c | 39 +++++++++++++++++++++++++++++----------
> 1 file changed, 29 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index d6612b922c76..c128570f2a5d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev)
> }
> EXPORT_SYMBOL_GPL(mddev_resume);
>
> +static void md_link_disk_holder(struct mddev *mddev)
> +{
> + struct md_rdev *rdev;
> +
> + rcu_read_lock();
> + rdev_for_each_rcu(rdev, mddev) {
> + if (test_bit(SymlinkCreated, &rdev->flags))
> + continue;
> + if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
> + set_bit(SymlinkCreated, &rdev->flags);
> + }
> + rcu_read_unlock();
> +}
> +
> /*
> * Generic flush handling for md
> */
> @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev)
>
> list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
> list_del_init(&rdev->same_set);
> + if (test_bit(SymlinkCreated, &rdev->flags)) {
> + bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
> + clear_bit(SymlinkCreated, &rdev->flags);
> + }
> + rdev->mddev = NULL;
> kobject_del(&rdev->kobj);
> export_rdev(rdev, mddev);
> }
> @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
> sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
>
> list_add_rcu(&rdev->same_set, &mddev->disks);
> - if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
> - set_bit(SymlinkCreated, &rdev->flags);
>
> /* May as well allow recovery to be retried once */
> mddev->recovery_disabled++;
> @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
> {
> struct mddev *mddev = rdev->mddev;
>
> - if (test_bit(SymlinkCreated, &rdev->flags)) {
> - bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
> - clear_bit(SymlinkCreated, &rdev->flags);
> - }
> list_del_rcu(&rdev->same_set);
> pr_debug("md: unbind<%pg>\n", rdev->bdev);
> mddev_destroy_serial_pool(rdev->mddev, rdev);
> - rdev->mddev = NULL;
> sysfs_remove_link(&rdev->kobj, "block");
> sysfs_put(rdev->sysfs_state);
> sysfs_put(rdev->sysfs_unack_badblocks);
> @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
> if (err)
> export_rdev(rdev, mddev);
> mddev_unlock_and_resume(mddev);
> - if (!err)
> + if (!err) {
> + md_link_disk_holder(mddev);
> md_new_event();
> + }
> return err ? err : len;
> }
>
> @@ -6606,6 +6620,7 @@ static void autorun_devices(int part)
> }
> autorun_array(mddev);
> mddev_unlock_and_resume(mddev);
> + md_link_disk_holder(mddev);
> }
> /* on success, candidates will be empty, on error
> * it won't...
> @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
> err != -EINVAL)
> mddev->hold_active = 0;
>
> - md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
> - mddev_unlock(mddev);
> + if (md_ioctl_need_suspend(cmd)) {
> + mddev_unlock_and_resume(mddev);
> + md_link_disk_holder(mddev);
> + } else {
> + mddev_unlock(mddev);
> + }
>
> out:
> if(did_set_md_closing)
>
Powered by blists - more mailing lists