[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230506012315.3370489-5-linan666@huaweicloud.com>
Date: Sat, 6 May 2023 09:23:15 +0800
From: linan666@...weicloud.com
To: song@...nel.org, neilb@...e.de, Rob.Becker@...erbed.com
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
linan122@...wei.com, yukuai3@...wei.com, yi.zhang@...wei.com,
houtao1@...wei.com, yangerkun@...wei.com
Subject: [PATCH v2 4/4] md/raid10: optimize check_decay_read_errors()
From: Li Nan <linan122@...wei.com>
check_decay_read_errors() is used to handle rdev->read_errors. But
read_errors is inc and read after check_decay_read_errors() is invoked
in fix_read_error().
Put all operations of read_errors into check_decay_read_errors() and
clean up unnecessary atomic_read of read_errors.
Suggested-by: Yu Kuai <yukuai3@...wei.com>
Signed-off-by: Li Nan <linan122@...wei.com>
---
drivers/md/raid10.c | 66 ++++++++++++++++++++++++---------------------
1 file changed, 35 insertions(+), 31 deletions(-)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 4d615fcc6a50..79f94882227d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2655,39 +2655,53 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
}
/*
- * Used by fix_read_error() to decay the per rdev read_errors.
+ * Used by fix_read_error() to decay the per rdev read_errors and check if
+ * read_error > max_read_errors.
* We halve the read error count for every hour that has elapsed
* since the last recorded read error.
*
*/
-static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
+static bool check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
{
- long cur_time_mon;
+ time64_t cur_time_mon = ktime_get_seconds();
unsigned long hours_since_last;
- unsigned int read_errors = atomic_read(&rdev->read_errors);
-
- cur_time_mon = ktime_get_seconds();
+ unsigned int read_errors;
+ unsigned int max_read_errors =
+ atomic_read(&mddev->max_corr_read_errors);
if (rdev->last_read_error == 0) {
/* first time we've seen a read error */
rdev->last_read_error = cur_time_mon;
- return;
- }
+ } else {
+ hours_since_last = (long)(cur_time_mon -
+ rdev->last_read_error) / 3600;
- hours_since_last = (long)(cur_time_mon -
- rdev->last_read_error) / 3600;
+ rdev->last_read_error = cur_time_mon;
- rdev->last_read_error = cur_time_mon;
+ /*
+ * if hours_since_last is > the number of bits in read_errors
+ * just set read errors to 0. We do this to avoid
+ * overflowing the shift of read_errors by hours_since_last.
+ */
+ read_errors = atomic_read(&rdev->read_errors);
+ if (hours_since_last >= 8 * sizeof(read_errors))
+ atomic_set(&rdev->read_errors, 0);
+ else
+ atomic_set(&rdev->read_errors,
+ read_errors >> hours_since_last);
+ }
- /*
- * if hours_since_last is > the number of bits in read_errors
- * just set read errors to 0. We do this to avoid
- * overflowing the shift of read_errors by hours_since_last.
- */
- if (hours_since_last >= 8 * sizeof(read_errors))
- atomic_set(&rdev->read_errors, 0);
- else
- atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
+ read_errors = atomic_inc_return(&rdev->read_errors);
+ if (read_errors > max_read_errors) {
+ pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n",
+ mdname(mddev), rdev->bdev, read_errors, max_read_errors);
+ pr_notice("md/raid10:%s: %pg: Failing raid device\n",
+ mdname(mddev), rdev->bdev);
+ md_error(mddev, rdev);
+ return false;
+ }
+
+ return true;
}
static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
@@ -2727,8 +2741,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors;
struct md_rdev *rdev;
- unsigned int max_read_errors =
- atomic_read(&mddev->max_corr_read_errors);
int d = r10_bio->devs[r10_bio->read_slot].devnum;
/* still own a reference to this rdev, so it cannot
@@ -2741,15 +2753,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
more fix_read_error() attempts */
return;
- check_decay_read_errors(mddev, rdev);
- atomic_inc(&rdev->read_errors);
- if (atomic_read(&rdev->read_errors) > max_read_errors) {
- pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %u:max %u]\n",
- mdname(mddev), rdev->bdev,
- atomic_read(&rdev->read_errors), max_read_errors);
- pr_notice("md/raid10:%s: %pg: Failing raid device\n",
- mdname(mddev), rdev->bdev);
- md_error(mddev, rdev);
+ if (check_decay_read_errors(mddev, rdev)) {
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return;
}
--
2.31.1
Powered by blists - more mailing lists