[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230623173236.2513554-4-linan666@huaweicloud.com>
Date: Sat, 24 Jun 2023 01:32:36 +0800
From: linan666@...weicloud.com
To: song@...nel.org
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
linan122@...wei.com, yukuai3@...wei.com, yi.zhang@...wei.com,
houtao1@...wei.com, yangerkun@...wei.com
Subject: [PATCH 3/3] md/raid10: handle replacement devices in fix_read_error
From: Li Nan <linan122@...wei.com>
In fix_read_error(), the handling of replacement devices is missing. If
read replacement device errors, we will attempt to fix 'mirror->rdev'.
It is wrong. Get rdev from r10bio to ensure that the fixed device is the
one which read error occurred.
Signed-off-by: Li Nan <linan122@...wei.com>
---
drivers/md/raid10.c | 32 +++++++++++++++++---------------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a36e53fce21f..4a7c8eaf6ea0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2726,15 +2726,10 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
{
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
- struct md_rdev *rdev;
+ struct md_rdev *rdev = r10_bio->devs[slot].rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[slot].devnum;
- /* still own a reference to this rdev, so it cannot
- * have been cleared recently.
- */
- rdev = conf->mirrors[d].rdev;
-
if (test_bit(Faulty, &rdev->flags))
/* drive has already been failed, just ignore any
more fix_read_error() attempts */
@@ -2763,12 +2758,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
s = PAGE_SIZE >> 9;
rcu_read_lock();
+ rdev = r10_bio->devs[slot].rdev;
do {
sector_t first_bad;
int bad_sectors;
- d = r10_bio->devs[sl].devnum;
- rdev = rcu_dereference(conf->mirrors[d].rdev);
if (rdev &&
test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags) &&
@@ -2790,6 +2784,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
sl++;
if (sl == conf->copies)
sl = 0;
+ d = r10_bio->devs[sl].devnum;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
} while (sl != slot);
rcu_read_unlock();
@@ -2798,9 +2794,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
* as bad on the first device to discourage future
* reads.
*/
- int dn = r10_bio->devs[slot].devnum;
- rdev = conf->mirrors[dn].rdev;
-
+ rdev = r10_bio->devs[slot].rdev;
if (!rdev_set_badblocks(
rdev,
r10_bio->devs[slot].addr
@@ -2820,8 +2814,12 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (sl==0)
sl = conf->copies;
sl--;
- d = r10_bio->devs[sl].devnum;
- rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (sl == slot) {
+ rdev = r10_bio->devs[slot].rdev;
+ } else {
+ d = r10_bio->devs[sl].devnum;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
+ }
if (!rdev ||
test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags))
@@ -2854,8 +2852,12 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (sl==0)
sl = conf->copies;
sl--;
- d = r10_bio->devs[sl].devnum;
- rdev = rcu_dereference(conf->mirrors[d].rdev);
+ if (sl == slot) {
+ rdev = r10_bio->devs[slot].rdev;
+ } else {
+ d = r10_bio->devs[sl].devnum;
+ rdev = rcu_dereference(conf->mirrors[d].rdev);
+ }
if (!rdev ||
test_bit(Faulty, &rdev->flags) ||
!test_bit(In_sync, &rdev->flags))
--
2.39.2
Powered by blists - more mailing lists