[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250911020441.2858174-2-linan666@huaweicloud.com>
Date: Thu, 11 Sep 2025 10:04:40 +0800
From: linan666@...weicloud.com
To: song@...nel.org,
yukuai3@...wei.com,
neil@...wn.name,
namhyung@...il.com
Cc: linux-raid@...r.kernel.org,
linux-kernel@...r.kernel.org,
linan666@...weicloud.com,
yangerkun@...wei.com,
yi.zhang@...wei.com
Subject: [PATCH 2/3] md: fix incorrect sync progress update on sync read errors
From: Li Nan <linan122@...wei.com>
When a sync read fails and badblocks recording fails (exceeding the 512),
the device is not immediately marked Faulty. Instead, 'recovery_disabled'
is set, and non-In_sync devices are removed later. This preserves array
availability: if users never read the damaged region, the raid remains
available and gains fault tolerance.
However, during the brief window before the device removal,
resync/recovery_offset was incorrectly updated to include the bad sectors.
This could lead to inconsistent data being read from those sectors.
Fix it by:
- Set MD_RECOVERY_ERROR when bad block recording fails for sync reads.
- Do not update curr_resync_completed if MD_RECOVERY_ERROR set.
- Use curr_resync_completed as the final resync progress indicator.
Fixes: 5e5702898e93 ("md/raid10: Handle read errors during recovery better.")
Fixes: 3a9f28a5117e ("md/raid1: improve handling of read failure during recovery.")
Signed-off-by: Li Nan <linan122@...wei.com>
---
drivers/md/md.c | 48 ++++++++++++++++++---------------------------
drivers/md/raid10.c | 2 +-
2 files changed, 20 insertions(+), 30 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 0094830126b4..f3abfc140481 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -9470,18 +9470,20 @@ void md_do_sync(struct md_thread *thread)
time_after_eq(jiffies, update_time + UPDATE_FREQUENCY) ||
(j - mddev->curr_resync_completed)*2
>= mddev->resync_max - mddev->curr_resync_completed ||
- mddev->curr_resync_completed > mddev->resync_max
- )) {
+ mddev->curr_resync_completed > mddev->resync_max)) {
/* time to update curr_resync_completed */
wait_event(mddev->recovery_wait,
atomic_read(&mddev->recovery_active) == 0);
- mddev->curr_resync_completed = j;
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
- j > mddev->resync_offset)
- mddev->resync_offset = j;
- update_time = jiffies;
- set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
- sysfs_notify_dirent_safe(mddev->sysfs_completed);
+
+ if (!test_bit(MD_RECOVERY_ERROR, &mddev->recovery)) {
+ mddev->curr_resync_completed = j;
+ if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
+ j > mddev->resync_offset)
+ mddev->resync_offset = j;
+ update_time = jiffies;
+ set_bit(MD_SB_CHANGE_CLEAN, &mddev->sb_flags);
+ sysfs_notify_dirent_safe(mddev->sysfs_completed);
+ }
}
while (j >= mddev->resync_max &&
@@ -9594,7 +9596,7 @@ void md_do_sync(struct md_thread *thread)
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_ERROR, &mddev->recovery) &&
mddev->curr_resync >= MD_RESYNC_ACTIVE) {
mddev->curr_resync_completed = mddev->curr_resync;
sysfs_notify_dirent_safe(mddev->sysfs_completed);
@@ -9602,32 +9604,20 @@ void md_do_sync(struct md_thread *thread)
mddev->pers->sync_request(mddev, max_sectors, max_sectors, &skipped);
if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
- mddev->curr_resync > MD_RESYNC_ACTIVE) {
+ mddev->curr_resync_completed > MD_RESYNC_ACTIVE) {
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ mddev->curr_resync_completed = MaxSector;
+
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
- if (mddev->curr_resync >= mddev->resync_offset) {
- pr_debug("md: checkpointing %s of %s.\n",
- desc, mdname(mddev));
- if (test_bit(MD_RECOVERY_ERROR,
- &mddev->recovery))
- mddev->resync_offset =
- mddev->curr_resync_completed;
- else
- mddev->resync_offset =
- mddev->curr_resync;
- }
- } else
- mddev->resync_offset = MaxSector;
+ mddev->resync_offset = mddev->curr_resync_completed;
} else {
- if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
- mddev->curr_resync = MaxSector;
if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) {
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (mddev->delta_disks >= 0 &&
- rdev_needs_recovery(rdev, mddev->curr_resync))
- rdev->recovery_offset = mddev->curr_resync;
+ rdev_needs_recovery(rdev, mddev->curr_resync_completed))
+ rdev->recovery_offset = mddev->curr_resync_completed;
rcu_read_unlock();
}
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 02e1c3db70ca..c3cfbb0347e7 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2543,7 +2543,7 @@ static void fix_recovery_read_error(struct r10bio *r10_bio)
conf->mirrors[dw].recovery_disabled
= mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR,
+ set_bit(MD_RECOVERY_ERROR,
&mddev->recovery);
break;
}
--
2.39.2
Powered by blists - more mailing lists