lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Thu, 21 Dec 2023 16:49:18 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: linan666@...weicloud.com, song@...nel.org
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
 yi.zhang@...wei.com, houtao1@...wei.com, yangerkun@...wei.com,
 "yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH 2/2] md: create symlink with disk holder after mddev
 resume

Hi,

在 2023/12/21 15:11, linan666@...weicloud.com 写道:
> From: Li Nan <linan122@...wei.com>
> 
> There is a risk of deadlock when a process gets disk->open_mutex after
> suspending mddev, because other processes may hold open_mutex while
> submitting io. For example:
> 
>    T1				T2
>    blkdev_open
>     bdev_open_by_dev
>      mutex_lock(&disk->open_mutex)
>    				md_ioctl
>    				 mddev_suspend_and_lock
>    				  mddev_suspend
>    				 md_add_new_disk
>    				  bind_rdev_to_array
>    				   bd_link_disk_holder
>    				    //wait open_mutex
>      blkdev_get_whole
>       bdev_disk_changed
>        efi_partition
>         read_lba
>          ...
>           md_submit_bio
>            md_handle_request
>             //wait resume
> 

Nice catch! This patch looks good except that the new flag
'SymlinkCreated' doesn't look accurate, perhaps 'HolderLinked'
will make more sense.

Thanks,
Kuai

> Fix it by getting disk->open_mutex after mddev resume, iterating each
> mddev->disk to create symlink for rdev which has not been created yet.
> and moving bd_unlink_disk_holder() to mddev_unlock(), rdev has been
> deleted from mddev->disks here, which can avoid concurrent bind and unbind,
> 
> Fixes: 1b0a2d950ee2 ("md: use new apis to suspend array for ioctls involed array reconfiguration")
> Signed-off-by: Li Nan <linan122@...wei.com>
> ---
>   drivers/md/md.c | 39 +++++++++++++++++++++++++++++----------
>   1 file changed, 29 insertions(+), 10 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index d6612b922c76..c128570f2a5d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -521,6 +521,20 @@ void mddev_resume(struct mddev *mddev)
>   }
>   EXPORT_SYMBOL_GPL(mddev_resume);
>   
> +static void md_link_disk_holder(struct mddev *mddev)
> +{
> +	struct md_rdev *rdev;
> +
> +	rcu_read_lock();
> +	rdev_for_each_rcu(rdev, mddev) {
> +		if (test_bit(SymlinkCreated, &rdev->flags))
> +			continue;
> +		if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
> +			set_bit(SymlinkCreated, &rdev->flags);
> +	}
> +	rcu_read_unlock();
> +}
> +
>   /*
>    * Generic flush handling for md
>    */
> @@ -902,6 +916,11 @@ void mddev_unlock(struct mddev *mddev)
>   
>   	list_for_each_entry_safe(rdev, tmp, &delete, same_set) {
>   		list_del_init(&rdev->same_set);
> +		if (test_bit(SymlinkCreated, &rdev->flags)) {
> +			bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
> +			clear_bit(SymlinkCreated, &rdev->flags);
> +		}
> +		rdev->mddev = NULL;
>   		kobject_del(&rdev->kobj);
>   		export_rdev(rdev, mddev);
>   	}
> @@ -2526,8 +2545,6 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
>   		sysfs_get_dirent_safe(rdev->kobj.sd, "bad_blocks");
>   
>   	list_add_rcu(&rdev->same_set, &mddev->disks);
> -	if (!bd_link_disk_holder(rdev->bdev, mddev->gendisk))
> -		set_bit(SymlinkCreated, &rdev->flags);
>   
>   	/* May as well allow recovery to be retried once */
>   	mddev->recovery_disabled++;
> @@ -2562,14 +2579,9 @@ static void md_kick_rdev_from_array(struct md_rdev *rdev)
>   {
>   	struct mddev *mddev = rdev->mddev;
>   
> -	if (test_bit(SymlinkCreated, &rdev->flags)) {
> -		bd_unlink_disk_holder(rdev->bdev, rdev->mddev->gendisk);
> -		clear_bit(SymlinkCreated, &rdev->flags);
> -	}
>   	list_del_rcu(&rdev->same_set);
>   	pr_debug("md: unbind<%pg>\n", rdev->bdev);
>   	mddev_destroy_serial_pool(rdev->mddev, rdev);
> -	rdev->mddev = NULL;
>   	sysfs_remove_link(&rdev->kobj, "block");
>   	sysfs_put(rdev->sysfs_state);
>   	sysfs_put(rdev->sysfs_unack_badblocks);
> @@ -4667,8 +4679,10 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
>   	if (err)
>   		export_rdev(rdev, mddev);
>   	mddev_unlock_and_resume(mddev);
> -	if (!err)
> +	if (!err) {
> +		md_link_disk_holder(mddev);
>   		md_new_event();
> +	}
>   	return err ? err : len;
>   }
>   
> @@ -6606,6 +6620,7 @@ static void autorun_devices(int part)
>   			}
>   			autorun_array(mddev);
>   			mddev_unlock_and_resume(mddev);
> +			md_link_disk_holder(mddev);
>   		}
>   		/* on success, candidates will be empty, on error
>   		 * it won't...
> @@ -7832,8 +7847,12 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
>   	    err != -EINVAL)
>   		mddev->hold_active = 0;
>   
> -	md_ioctl_need_suspend(cmd) ? mddev_unlock_and_resume(mddev) :
> -				     mddev_unlock(mddev);
> +	if (md_ioctl_need_suspend(cmd)) {
> +		mddev_unlock_and_resume(mddev);
> +		md_link_disk_holder(mddev);
> +	} else {
> +		mddev_unlock(mddev);
> +	}
>   
>   out:
>   	if(did_set_md_closing)
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ