lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <9b504689-6c9a-48b5-882c-8ddfaf43a801@kernel.org>
Date: Sat, 12 Jul 2025 17:58:10 +0800
From: Yu Kuai <yukuai@...nel.org>
To: Zheng Qixing <zhengqixing@...weicloud.com>, song@...nel.org,
 yukuai3@...wei.com, linan122@...wei.com
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
 yi.zhang@...wei.com, yangerkun@...wei.com, houtao1@...wei.com,
 zhengqixing@...wei.com
Subject: Re: [PATCH] md: allow removing faulty rdev during resync

在 2025/7/7 15:54, Zheng Qixing 写道:

> From: Zheng Qixing <zhengqixing@...wei.com>
>
> During RAID resync, faulty rdev cannot be removed and will result in
> "Device or resource busy" error when attempting hot removal.
>
> Reproduction steps:
>    mdadm -Cv /dev/md0 -l1 -n3 -e1.2 /dev/sd{b..d}
>    mdadm /dev/md0 -f /dev/sdb
>    mdadm /dev/md0 -r /dev/sdb
>    -> mdadm: hot remove failed for /dev/sdb: Device or resource busy
>
> After commit 4b10a3bc67c1 ("md: ensure resync is prioritized over
> recovery"), when a device becomes faulty during resync, the
> md_choose_sync_action() function returns early without calling
> remove_and_add_spares(), preventing faulty device removal.
>
> This patch extracts a helper function remove_spares() to support
> removing faulty devices during RAID resync operations.
>
> Fixes: 4b10a3bc67c1 ("md: ensure resync is prioritized over recovery")
> Signed-off-by: Zheng Qixing <zhengqixing@...wei.com>
> ---
>   drivers/md/md.c | 24 +++++++++++++++++-------
>   1 file changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 0f03b21e66e4..7f5e5a16243a 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c

Applied to md-6.17

Thanks

> @@ -9456,17 +9456,11 @@ static bool md_spares_need_change(struct mddev *mddev)
>   	return false;
>   }
>   
> -static int remove_and_add_spares(struct mddev *mddev,
> -				 struct md_rdev *this)
> +static int remove_spares(struct mddev *mddev, struct md_rdev *this)
>   {
>   	struct md_rdev *rdev;
> -	int spares = 0;
>   	int removed = 0;
>   
> -	if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> -		/* Mustn't remove devices when resync thread is running */
> -		return 0;
> -
>   	rdev_for_each(rdev, mddev) {
>   		if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
>   		    !mddev->pers->hot_remove_disk(mddev, rdev)) {
> @@ -9480,6 +9474,21 @@ static int remove_and_add_spares(struct mddev *mddev,
>   	if (removed && mddev->kobj.sd)
>   		sysfs_notify_dirent_safe(mddev->sysfs_degraded);
>   
> +	return removed;
> +}
> +
> +static int remove_and_add_spares(struct mddev *mddev,
> +				 struct md_rdev *this)
> +{
> +	struct md_rdev *rdev;
> +	int spares = 0;
> +	int removed = 0;
> +
> +	if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
> +		/* Mustn't remove devices when resync thread is running */
> +		return 0;
> +
> +	removed = remove_spares(mddev, this);
>   	if (this && removed)
>   		goto no_add;
>   
> @@ -9522,6 +9531,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
>   
>   	/* Check if resync is in progress. */
>   	if (mddev->recovery_cp < MaxSector) {
> +		remove_spares(mddev, NULL);
>   		set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
>   		clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
>   		return true;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ