[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <7ba1f088-2dbb-74db-d9a2-1a7c466f9aa0@huaweicloud.com>
Date: Fri, 30 May 2025 14:43:26 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: Yu Kuai <yukuai1@...weicloud.com>, mpatocka@...hat.com,
zdenek.kabelac@...il.com, song@...nel.org
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, yangerkun@...wei.com, johnny.chenyi@...wei.com,
"yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH v2] md/raid1,raid10: don't handle IO error for REQ_RAHEAD
and REQ_NOWAIT
在 2025/05/27 16:14, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@...wei.com>
>
> IO with REQ_RAHEAD or REQ_NOWAIT can fail early, even if the storage medium
> is fine, hence record badblocks or remove the disk from array does not
> make sense.
>
> This problem if found by lvm2 test lvcreate-large-raid, where dm-zero
> will fail read ahead IO directly.
>
> Reported-and-tested-by: Mikulas Patocka <mpatocka@...hat.com>
> Closes: https://lore.kernel.org/all/34fa755d-62c8-4588-8ee1-33cb1249bdf2@redhat.com/
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
> Changes in v2:
> - handle REQ_NOWAIT as well.
>
> drivers/md/raid1-10.c | 10 ++++++++++
> drivers/md/raid1.c | 19 ++++++++++---------
> drivers/md/raid10.c | 11 ++++++-----
> 3 files changed, 26 insertions(+), 14 deletions(-)
>
Applied to md-6.16
Thanks
> diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
> index c7efd8aab675..b8b3a9069701 100644
> --- a/drivers/md/raid1-10.c
> +++ b/drivers/md/raid1-10.c
> @@ -293,3 +293,13 @@ static inline bool raid1_should_read_first(struct mddev *mddev,
>
> return false;
> }
> +
> +/*
> + * bio with REQ_RAHEAD or REQ_NOWAIT can fail at anytime, before such IO is
> + * submitted to the underlying disks, hence don't record badblocks or retry
> + * in this case.
> + */
> +static inline bool raid1_should_handle_error(struct bio *bio)
> +{
> + return !(bio->bi_opf & (REQ_RAHEAD | REQ_NOWAIT));
> +}
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 657d481525be..19c5a0ce5a40 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -373,14 +373,16 @@ static void raid1_end_read_request(struct bio *bio)
> */
> update_head_pos(r1_bio->read_disk, r1_bio);
>
> - if (uptodate)
> + if (uptodate) {
> set_bit(R1BIO_Uptodate, &r1_bio->state);
> - else if (test_bit(FailFast, &rdev->flags) &&
> - test_bit(R1BIO_FailFast, &r1_bio->state))
> + } else if (test_bit(FailFast, &rdev->flags) &&
> + test_bit(R1BIO_FailFast, &r1_bio->state)) {
> /* This was a fail-fast read so we definitely
> * want to retry */
> ;
> - else {
> + } else if (!raid1_should_handle_error(bio)) {
> + uptodate = 1;
> + } else {
> /* If all other devices have failed, we want to return
> * the error upwards rather than fail the last device.
> * Here we redefine "uptodate" to mean "Don't want to retry"
> @@ -451,16 +453,15 @@ static void raid1_end_write_request(struct bio *bio)
> struct bio *to_put = NULL;
> int mirror = find_bio_disk(r1_bio, bio);
> struct md_rdev *rdev = conf->mirrors[mirror].rdev;
> - bool discard_error;
> sector_t lo = r1_bio->sector;
> sector_t hi = r1_bio->sector + r1_bio->sectors;
> -
> - discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
> + bool ignore_error = !raid1_should_handle_error(bio) ||
> + (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
>
> /*
> * 'one mirror IO has finished' event handler:
> */
> - if (bio->bi_status && !discard_error) {
> + if (bio->bi_status && !ignore_error) {
> set_bit(WriteErrorSeen, &rdev->flags);
> if (!test_and_set_bit(WantReplacement, &rdev->flags))
> set_bit(MD_RECOVERY_NEEDED, &
> @@ -511,7 +512,7 @@ static void raid1_end_write_request(struct bio *bio)
>
> /* Maybe we can clear some bad blocks. */
> if (rdev_has_badblock(rdev, r1_bio->sector, r1_bio->sectors) &&
> - !discard_error) {
> + !ignore_error) {
> r1_bio->bios[mirror] = IO_MADE_GOOD;
> set_bit(R1BIO_MadeGood, &r1_bio->state);
> }
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index dce06bf65016..b74780af4c22 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -399,6 +399,8 @@ static void raid10_end_read_request(struct bio *bio)
> * wait for the 'master' bio.
> */
> set_bit(R10BIO_Uptodate, &r10_bio->state);
> + } else if (!raid1_should_handle_error(bio)) {
> + uptodate = 1;
> } else {
> /* If all other devices that store this block have
> * failed, we want to return the error upwards rather
> @@ -456,9 +458,8 @@ static void raid10_end_write_request(struct bio *bio)
> int slot, repl;
> struct md_rdev *rdev = NULL;
> struct bio *to_put = NULL;
> - bool discard_error;
> -
> - discard_error = bio->bi_status && bio_op(bio) == REQ_OP_DISCARD;
> + bool ignore_error = !raid1_should_handle_error(bio) ||
> + (bio->bi_status && bio_op(bio) == REQ_OP_DISCARD);
>
> dev = find_bio_disk(conf, r10_bio, bio, &slot, &repl);
>
> @@ -472,7 +473,7 @@ static void raid10_end_write_request(struct bio *bio)
> /*
> * this branch is our 'one mirror IO has finished' event handler:
> */
> - if (bio->bi_status && !discard_error) {
> + if (bio->bi_status && !ignore_error) {
> if (repl)
> /* Never record new bad blocks to replacement,
> * just fail it.
> @@ -527,7 +528,7 @@ static void raid10_end_write_request(struct bio *bio)
> /* Maybe we can clear some bad blocks. */
> if (rdev_has_badblock(rdev, r10_bio->devs[slot].addr,
> r10_bio->sectors) &&
> - !discard_error) {
> + !ignore_error) {
> bio_put(bio);
> if (repl)
> r10_bio->devs[slot].repl_bio = IO_MADE_GOOD;
>
Powered by blists - more mailing lists