[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <319b7c8e-7595-d4a5-b9a2-11ebea670281@huaweicloud.com>
Date: Thu, 11 Sep 2025 11:16:13 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: linan666@...weicloud.com, song@...nel.org, neil@...wn.name,
namhyung@...il.com
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
yangerkun@...wei.com, yi.zhang@...wei.com, "yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH 2/3] md: fix incorrect sync progress update on sync read
errors
Hi,
在 2025/09/11 10:04, linan666@...weicloud.com 写道:
> From: Li Nan <linan122@...wei.com>
>
> When a sync read fails and badblocks recording fails (exceeding the 512),
I think it's better to faulty this rdev directly, user really need to
replace this disk ASAP.
Thanks,
Kuai
> the device is not immediately marked Faulty. Instead, 'recovery_disabled'
> is set, and non-In_sync devices are removed later. This preserves array
> availability: if users never read the damaged region, the raid remains
> available and gains fault tolerance.
>
> However, during the brief window before the device removal,
> resync/recovery_offset was incorrectly updated to include the bad sectors.
> This could lead to inconsistent data being read from those sectors.
>
> Fix it by:
> - Set MD_RECOVERY_ERROR when bad block recording fails for sync reads.
> - Do not update curr_resync_completed if MD_RECOVERY_ERROR set.
> - Use curr_resync_completed as the final resync progress indicator.
>
> Fixes: 5e5702898e93 ("md/raid10: Handle read errors during recovery better.")
> Fixes: 3a9f28a5117e ("md/raid1: improve handling of read failure during recovery.")
> Signed-off-by: Li Nan <linan122@...wei.com>
> ---
> drivers/md/md.c | 48 ++++++++++++++++++---------------------------
> drivers/md/raid10.c | 2 +-
> 2 files changed, 20 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 0094830126b4..f3abfc140481 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -9470,18 +9470,20 @@ void md_do_sync(struct md_thread *thread)
> time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
> (j - mddev->curr_resync_completed)*2
> >= mddev->resync_max - mddev->curr_resync_completed ||
> - mddev->curr_resync_completed > mddev->resync_max
> - )) {
> + mddev->curr_resync_completed > mddev->resync_max)) {
> /* time to update curr_resync_completed */
> wait_event(mddev->recovery_wait,
> atomic_read(&mddev->recovery_active) == 0);
> - mddev->curr_resync_completed = j;
> - if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
> - j > mddev->resync_offset)
> - mddev->resync_offset = j;
> - update_time = jiffies;
> - set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
> - sysfs_notify_dirent_safe(mddev->sysfs_completed);
> +
> + if (!test_bit(MD_RECOVERY_ERROR, &mddev->recovery)) {
> + mddev->curr_resync_completed = j;
> + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
> + j > mddev->resync_offset)
> + mddev->resync_offset = j;
> + update_time = jiffies;
> + set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
> + sysfs_notify_dirent_safe(mddev->sysfs_completed);
> + }
> }
>
> while (j >= mddev->resync_max &&
> @@ -9594,7 +9596,7 @@ void md_do_sync(struct md_thread *thread)
> wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
>
> if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
> - !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
> + !test_bit(MD_RECOVERY_ERROR, &mddev->recovery) &&
> mddev->curr_resync >= MD_RESYNC_ACTIVE) {
> mddev->curr_resync_completed = mddev->curr_resync;
> sysfs_notify_dirent_safe(mddev->sysfs_completed);
> @@ -9602,32 +9604,20 @@ void md_do_sync(struct md_thread *thread)
> mddev->pers->sync_request(mddev, max_sectors, max_sectors, &skipped);
>
> if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
> - mddev->curr_resync > MD_RESYNC_ACTIVE) {
> + mddev->curr_resync_completed > MD_RESYNC_ACTIVE) {
> + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
> + mddev->curr_resync_completed = MaxSector;
> +
> if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
> - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
> - if (mddev->curr_resync >= mddev->resync_offset) {
> - pr_debug("md: checkpointing %s of %s.\n",
> - desc, mdname(mddev));
> - if (test_bit(MD_RECOVERY_ERROR,
> - &mddev->recovery))
> - mddev->resync_offset =
> - mddev->curr_resync_completed;
> - else
> - mddev->resync_offset =
> - mddev->curr_resync;
> - }
> - } else
> - mddev->resync_offset = MaxSector;
> + mddev->resync_offset = mddev->curr_resync_completed;
> } else {
> - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
> - mddev->curr_resync = MaxSector;
> if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
> test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
> rcu_read_lock();
> rdev_for_each_rcu(rdev, mddev)
> if (mddev->delta_disks >= 0 &&
> - rdev_needs_recovery(rdev, mddev->curr_resync))
> - rdev->recovery_offset = mddev->curr_resync;
> + rdev_needs_recovery(rdev, mddev->curr_resync_completed))
> + rdev->recovery_offset = mddev->curr_resync_completed;
> rcu_read_unlock();
> }
> }
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 02e1c3db70ca..c3cfbb0347e7 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2543,7 +2543,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
>
> conf->mirrors[dw].recovery_disabled
> = mddev->recovery_disabled;
> - set_bit(MD_RECOVERY_INTR,
> + set_bit(MD_RECOVERY_ERROR,
> &mddev->recovery);
> break;
> }
>
Powered by blists - more mailing lists