[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <a9119707-4fa1-58cb-377c-63df67f88957@huaweicloud.com>
Date: Wed, 5 Mar 2025 14:36:58 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: Doug V Johnson <dougvj@...gvj.net>
Cc: Doug Johnson <dougvj@...il.com>, Song Liu <song@...nel.org>,
"open list:SOFTWARE RAID (Multiple Disks) SUPPORT"
<linux-raid@...r.kernel.org>, open list <linux-kernel@...r.kernel.org>,
"yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH v3 3/3] md/raid5: check for overlapping bad blocks before
starting reshape
Hi,
在 2025/02/24 17:02, Doug V Johnson 写道:
> In addition to halting a reshape in progress when we encounter bad
> blocks, we want to make sure that we do not even attempt a reshape if we
> know before hand that there are too many overlapping bad blocks and we
> would have to stall the reshape.
>
> To do this, we add a new internal function array_has_badblock() which
> first checks to see if there are enough drives with bad blocks for the
> condition to occur and if there are proceeds to do a simple O(n^2) check
> for overlapping bad blocks. If more overlaps are found than can be
> corrected for, we return 1 for the presence of bad blocks, otherwise 0
>
> This function is invoked in raid5_start_reshape() and if there are bad
> blocks present, returns -EIO which is reported to userspace.
>
> It's possible for bad blocks to be discovered or put in the metadata
> after a reshape has started, so we want to leave in place the
> functionality to detect and halt a reshape.
>
> Signed-off-by: Doug V Johnson <dougvj@...gvj.net>
> ---
> drivers/md/raid5.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 94 insertions(+)
>
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 8b23109d6f37..4b907a674dd1 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -8451,6 +8451,94 @@ static int check_reshape(struct mddev *mddev)
> + mddev->delta_disks));
> }
>
> +static int array_has_badblock(struct r5conf *conf)
> +{
> + /* Searches for overlapping bad blocks on devices that would result
> + * in an unreadable condition
> + */
> + int i, j;
> + /* First see if we even have bad blocks on enough drives to have a
> + * bad read condition
> + */
> + int num_badblock_devs = 0;
> +
> + for (i = 0; i < conf->raid_disks; i++) {
> + if (rdev_has_badblock(conf->disks[i].rdev,
> + 0, conf->disks[i].rdev->sectors))
if (rdev->badblocks.count)
> + num_badblock_devs++;
> + }
> + if (num_badblock_devs <= conf->max_degraded) {
> + /* There are not enough devices with bad blocks to pose any
> + * read problem
> + */
> + return 0;
> + }
> + pr_debug("%s: running overlapping bad block check",
> + mdname(conf->mddev));
> + /* Do a more sophisticated check for overlapping regions */
> + for (i = 0; i < conf->raid_disks; i++) {
> + sector_t first_bad;
> + int bad_sectors;
> + sector_t next_check_s = 0;
> + int next_check_sectors = conf->disks[i].rdev->sectors;
> +
> + pr_debug("%s: badblock check: %i (s: %lu, sec: %i)",
> + mdname(conf->mddev), i,
> + (unsigned long)next_check_s, next_check_sectors);
> + while (is_badblock(conf->disks[i].rdev,
> + next_check_s, next_check_sectors,
> + &first_bad,
> + &bad_sectors) != 0) {
> + /* Align bad blocks to the size of our stripe */
> + sector_t aligned_first_bad = first_bad &
> + ~((sector_t)RAID5_STRIPE_SECTORS(conf) - 1);
> + int aligned_bad_sectors =
> + max_t(int, RAID5_STRIPE_SECTORS(conf),
> + bad_sectors);
> + int this_num_bad = 1;
For example, if first_bad is 0, bad_sectors is 512 in rdev0
> +
> + pr_debug("%s: found blocks %i %lu -> %i",
> + mdname(conf->mddev), i,
> + (unsigned long)aligned_first_bad,
> + aligned_bad_sectors);
> + for (j = 0; j < conf->raid_disks; j++) {
> + sector_t this_first_bad;
> + int this_bad_sectors;
> +
> + if (j == i)
> + continue;
> + if (is_badblock(conf->disks[j].rdev,
> + aligned_first_bad,
> + aligned_bad_sectors,
> + &this_first_bad,
> + &this_bad_sectors)) {
And rdev1 has badblocks 0+256, rdev2 has badblocks 256+256.
If this array is a raid6 with max_degraded=2, then it's fine.
Perhaps a pseudocode loop like following?
sector_t offset = 0;
while (offset < dev_sectors) {
len = dev_sectors - offset;
bad_disks = 0;
for (i = 0; i < conf->raid_disks; ++i) {
if (is_badblock(rdev, offset, len, &first_bad,
&bad_sectors)) {
if (first_bad <= offset) {
len = min(len, first_bad + bad_sectors
offset);
bad_disks++;
} else {
len = min(len, first_bad - offset);
}
}
}
if (bad_disks > max_degraded)
return false;
offset += len;
}
return true;
Thanks,
Kuai
> + this_num_bad++;
> + pr_debug("md/raid:%s: bad block overlap dev %i: %lu %i",
> + mdname(conf->mddev), j,
> + (unsigned long)this_first_bad,
> + this_bad_sectors);
> + }
> + }
> + if (this_num_bad > conf->max_degraded) {
> + pr_debug("md/raid:%s: %i drives with unreadable sector(s) around %lu %i due to bad block list",
> + mdname(conf->mddev),
> + this_num_bad,
> + (unsigned long)first_bad,
> + bad_sectors);
> + return 1;
> + }
> + next_check_s = first_bad + bad_sectors;
> + next_check_sectors =
> + next_check_sectors - (first_bad + bad_sectors);
> + pr_debug("%s: badblock check: %i (s: %lu, sec: %i)",
> + mdname(conf->mddev), i,
> + (unsigned long)next_check_s,
> + next_check_sectors);
> + }
> + }
> + return 0;
> +}
> +
> static int raid5_start_reshape(struct mddev *mddev)
> {
> struct r5conf *conf = mddev->private;
> @@ -8498,6 +8586,12 @@ static int raid5_start_reshape(struct mddev *mddev)
> return -EINVAL;
> }
>
> + if (array_has_badblock(conf)) {
> + pr_warn("md/raid:%s: reshape not possible due to bad block list",
> + mdname(mddev));
> + return -EIO;
> + }
> +
> atomic_set(&conf->reshape_stripes, 0);
> spin_lock_irq(&conf->device_lock);
> write_seqcount_begin(&conf->gen_lock);
>
Powered by blists - more mailing lists