[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230623173236.2513554-4-linan666@huaweicloud.com>
Date:   Sat, 24 Jun 2023 01:32:36 +0800
From:   linan666@...weicloud.com
To:     song@...nel.org
Cc:     linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
        linan122@...wei.com, yukuai3@...wei.com, yi.zhang@...wei.com,
        houtao1@...wei.com, yangerkun@...wei.com
Subject: [PATCH 3/3] md/raid10: handle replacement devices in fix_read_error
From: Li Nan <linan122@...wei.com>
In fix_read_error(), the handling of replacement devices is missing. If
read replacement device errors, we will attempt to fix 'mirror->rdev'.
It is wrong. Get rdev from r10bio to ensure that the fixed device is the
one which read error occurred.
Signed-off-by: Li Nan <linan122@...wei.com>
---
 drivers/md/raid10.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a36e53fce21f..4a7c8eaf6ea0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2726,15 +2726,10 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 {
 	int sect = 0; /* Offset from r10_bio->sector */
 	int sectors = r10_bio->sectors, slot = r10_bio->read_slot;
-	struct md_rdev *rdev;
+	struct md_rdev *rdev = r10_bio->devs[slot].rdev;
 	int max_read_errors = atomic_read(&mddev->max_corr_read_errors);
 	int d = r10_bio->devs[slot].devnum;
 
-	/* still own a reference to this rdev, so it cannot
-	 * have been cleared recently.
-	 */
-	rdev = conf->mirrors[d].rdev;
-
 	if (test_bit(Faulty, &rdev->flags))
 		/* drive has already been failed, just ignore any
 		   more fix_read_error() attempts */
@@ -2763,12 +2758,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			s = PAGE_SIZE >> 9;
 
 		rcu_read_lock();
+		rdev = r10_bio->devs[slot].rdev;
 		do {
 			sector_t first_bad;
 			int bad_sectors;
 
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
 			if (rdev &&
 			    test_bit(In_sync, &rdev->flags) &&
 			    !test_bit(Faulty, &rdev->flags) &&
@@ -2790,6 +2784,8 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			sl++;
 			if (sl == conf->copies)
 				sl = 0;
+			d = r10_bio->devs[sl].devnum;
+			rdev = rcu_dereference(conf->mirrors[d].rdev);
 		} while (sl != slot);
 		rcu_read_unlock();
 
@@ -2798,9 +2794,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			 * as bad on the first device to discourage future
 			 * reads.
 			 */
-			int dn = r10_bio->devs[slot].devnum;
-			rdev = conf->mirrors[dn].rdev;
-
+			rdev = r10_bio->devs[slot].rdev;
 			if (!rdev_set_badblocks(
 				    rdev,
 				    r10_bio->devs[slot].addr
@@ -2820,8 +2814,12 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			if (sl==0)
 				sl = conf->copies;
 			sl--;
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
+			if (sl == slot) {
+				rdev = r10_bio->devs[slot].rdev;
+			} else {
+				d = r10_bio->devs[sl].devnum;
+				rdev = rcu_dereference(conf->mirrors[d].rdev);
+			}
 			if (!rdev ||
 			    test_bit(Faulty, &rdev->flags) ||
 			    !test_bit(In_sync, &rdev->flags))
@@ -2854,8 +2852,12 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
 			if (sl==0)
 				sl = conf->copies;
 			sl--;
-			d = r10_bio->devs[sl].devnum;
-			rdev = rcu_dereference(conf->mirrors[d].rdev);
+			if (sl == slot) {
+				rdev = r10_bio->devs[slot].rdev;
+			} else {
+				d = r10_bio->devs[sl].devnum;
+				rdev = rcu_dereference(conf->mirrors[d].rdev);
+			}
 			if (!rdev ||
 			    test_bit(Faulty, &rdev->flags) ||
 			    !test_bit(In_sync, &rdev->flags))
-- 
2.39.2
Powered by blists - more mailing lists
 
