linux-kernel - Re: [PATCH 2/7] md: mark rdev Faulty when badblocks setting fails

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9e38a891-f5be-41ce-e00f-9ef8f905de10@huaweicloud.com>
Date: Mon, 22 Sep 2025 10:01:03 +0800
From: Li Nan <linan666@...weicloud.com>
To: linan666@...weicloud.com, song@...nel.org, yukuai3@...wei.com,
 neil@...wn.name, namhyung@...il.com
Cc: linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org,
 yangerkun@...wei.com, yi.zhang@...wei.com
Subject: Re: [PATCH 2/7] md: mark rdev Faulty when badblocks setting fails



在 2025/9/17 17:35, linan666@...weicloud.com 写道:
> From: Li Nan <linan122@...wei.com>
> 
> Currently when sync read fails and badblocks set fails (exceeding
> 512 limit), rdev isn't immediately marked Faulty. Instead
> 'recovery_disabled' is set and non-In_sync rdevs are removed later.
> This preserves array availability if bad regions aren't read, but bad
> sectors might be read by users before rdev removal. This occurs due
> to incorrect resync/recovery_offset updates that include these bad
> sectors.
> 
> When badblocks exceed 512, keeping the disk provides little benefit
> while adding complexity. Prompt disk replacement is more important.
> Therefore when badblocks set fails, directly call md_error to mark rdev
> Faulty immediately, preventing potential data access issues.
> 
> After this change, cleanup of offset update logic and 'recovery_disabled'
> handling will follow.
> 
> Fixes: 5e5702898e93 ("md/raid10: Handle read errors during recovery better.")
> Fixes: 3a9f28a5117e ("md/raid1: improve handling of read failure during recovery.")
> Signed-off-by: Li Nan <linan122@...wei.com>
> ---
>   drivers/md/md.c     |  8 ++++++-
>   drivers/md/raid1.c  | 41 +++++++++++++++--------------------
>   drivers/md/raid10.c | 53 ++++++++++++++++++++-------------------------
>   drivers/md/raid5.c  | 22 ++++++++-----------
>   4 files changed, 57 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 1795f725f7fb..05b6b3145648 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -10245,8 +10245,14 @@ bool rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
>   	else
>   		s += rdev->data_offset;
>   
> -	if (!badblocks_set(&rdev->badblocks, s, sectors, 0))
> +	if (!badblocks_set(&rdev->badblocks, s, sectors, 0)) {
> +		/*
> +		 * Mark the disk as Faulty when setting badblocks fails,
> +		 * otherwise, bad sectors may be read.
> +		 */
> +		md_error(mddev, rdev);
>   		return false;
> +	}
>   
>   	/* Make sure they get written out promptly */
>   	if (test_bit(ExternalBbl, &rdev->flags))
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 397b3a2eaee4..f7238e9f35e5 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -2127,8 +2127,7 @@ static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
>   				rdev->mddev->recovery);
>   	}
>   	/* need to record an error - either for the block or the device */
> -	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
> -		md_error(rdev->mddev, rdev);
> +	rdev_set_badblocks(rdev, sector, sectors, 0);
>   	return 0;
>   }
>   
> @@ -2453,8 +2452,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>   		if (!success) {
>   			/* Cannot read from anywhere - mark it bad */
>   			struct md_rdev *rdev = conf->mirrors[read_disk].rdev;
> -			if (!rdev_set_badblocks(rdev, sect, s, 0))
> -				md_error(mddev, rdev);
> +			rdev_set_badblocks(rdev, sect, s, 0);
>   			break;
>   		}
>   		/* write it back and re-read */
> @@ -2498,7 +2496,7 @@ static void fix_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>   	}
>   }
>   
> -static bool narrow_write_error(struct r1bio *r1_bio, int i)
> +static void narrow_write_error(struct r1bio *r1_bio, int i)
>   {
>   	struct mddev *mddev = r1_bio->mddev;
>   	struct r1conf *conf = mddev->private;
> @@ -2519,10 +2517,9 @@ static bool narrow_write_error(struct r1bio *r1_bio, int i)
>   	sector_t sector;
>   	int sectors;
>   	int sect_to_write = r1_bio->sectors;
> -	bool ok = true;
>   
>   	if (rdev->badblocks.shift < 0)
> -		return false;
> +		return;
>   
>   	block_sectors = roundup(1 << rdev->badblocks.shift,
>   				bdev_logical_block_size(rdev->bdev) >> 9);
> @@ -2553,18 +2550,21 @@ static bool narrow_write_error(struct r1bio *r1_bio, int i)
>   		bio_trim(wbio, sector - r1_bio->sector, sectors);
>   		wbio->bi_iter.bi_sector += rdev->data_offset;
>   
> -		if (submit_bio_wait(wbio) < 0)
> -			/* failure! */
> -			ok = rdev_set_badblocks(rdev, sector,
> -						sectors, 0)
> -				&& ok;
> +		if (submit_bio_wait(wbio) < 0 &&
> +		    !rdev_set_badblocks(rdev, sector, sectors, 0)) {
> +			/*
> +			 * Badblocks set failed, disk marked Faulty.
> +			 * No further operations needed.
> +			 */
> +			bio_put(wbio);
> +			break;
> +		}
>   
>   		bio_put(wbio);
>   		sect_to_write -= sectors;
>   		sector += sectors;
>   		sectors = block_sectors;
>   	}
> -	return ok;
>   }
>   
>   static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
> @@ -2577,14 +2577,11 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
>   		if (bio->bi_end_io == NULL)
>   			continue;
>   		if (!bio->bi_status &&
> -		    test_bit(R1BIO_MadeGood, &r1_bio->state)) {
> +		    test_bit(R1BIO_MadeGood, &r1_bio->state))
>   			rdev_clear_badblocks(rdev, r1_bio->sector, s, 0);
> -		}
>   		if (bio->bi_status &&
> -		    test_bit(R1BIO_WriteError, &r1_bio->state)) {
> -			if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0))
> -				md_error(conf->mddev, rdev);
> -		}
> +		    test_bit(R1BIO_WriteError, &r1_bio->state))
> +			rdev_set_badblocks(rdev, r1_bio->sector, s, 0);
>   	}
>   	put_buf(r1_bio);
>   	md_done_sync(conf->mddev, s);
> @@ -2608,10 +2605,8 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
>   			 * errors.
>   			 */
>   			fail = true;
> -			if (!narrow_write_error(r1_bio, m))
> -				md_error(conf->mddev,
> -					 conf->mirrors[m].rdev);
> -				/* an I/O failed, we can't clear the bitmap */
> +			narrow_write_error(r1_bio, m);
> +			/* an I/O failed, we can't clear the bitmap */
>   			rdev_dec_pending(conf->mirrors[m].rdev,
>   					 conf->mddev);
>   		}
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 2899fd1ecc57..4c58c32f7c27 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -2610,8 +2610,7 @@ static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector,
>   				&rdev->mddev->recovery);
>   	}
>   	/* need to record an error - either for the block or the device */
> -	if (!rdev_set_badblocks(rdev, sector, sectors, 0))
> -		md_error(rdev->mddev, rdev);
> +	rdev_set_badblocks(rdev, sector, sectors, 0);
>   	return 0;
>   }
>   
> @@ -2692,7 +2691,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
>   				    r10_bio->devs[slot].addr
>   				    + sect,
>   				    s, 0)) {
> -				md_error(mddev, rdev);
>   				r10_bio->devs[slot].bio
>   					= IO_BLOCKED;
>   			}
> @@ -2779,7 +2777,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
>   	}
>   }
>   
> -static bool narrow_write_error(struct r10bio *r10_bio, int i)
> +static void narrow_write_error(struct r10bio *r10_bio, int i)
>   {
>   	struct bio *bio = r10_bio->master_bio;
>   	struct mddev *mddev = r10_bio->mddev;
> @@ -2800,10 +2798,9 @@ static bool narrow_write_error(struct r10bio *r10_bio, int i)
>   	sector_t sector;
>   	int sectors;
>   	int sect_to_write = r10_bio->sectors;
> -	bool ok = true;
>   
>   	if (rdev->badblocks.shift < 0)
> -		return false;
> +		return;
>   

A direct return here is incorrect. This change ought to be removed after
Kenta's cleanup. I will fix it after that cleanup merged.

https://lore.kernel.org/all/cbce67bc-74c6-0c99-fdba-48cd8aa27dda@huaweicloud.com/

>   	block_sectors = roundup(1 << rdev->badblocks.shift,
>   				bdev_logical_block_size(rdev->bdev) >> 9);
> @@ -2826,18 +2823,21 @@ static bool narrow_write_error(struct r10bio *r10_bio, int i)
>   				   choose_data_offset(r10_bio, rdev);
>   		wbio->bi_opf = REQ_OP_WRITE;
>   
> -		if (submit_bio_wait(wbio) < 0)
> -			/* Failure! */
> -			ok = rdev_set_badblocks(rdev, wsector,
> -						sectors, 0)
> -				&& ok;
> +		if (submit_bio_wait(wbio) < 0 &&
> +		    rdev_set_badblocks(rdev, wsector, sectors, 0)) {
> +			/*
> +			 * Badblocks set failed, disk marked Faulty.
> +			 * No further operations needed.
> +			 */
> +			bio_put(wbio);
> +			break;
> +		}
>   
>   		bio_put(wbio);
>   		sect_to_write -= sectors;
>   		sector += sectors;
>   		sectors = block_sectors;
>   	}
> -	return ok;
>   }
>   
>   static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
> @@ -2897,35 +2897,29 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
>   			if (r10_bio->devs[m].bio == NULL ||
>   				r10_bio->devs[m].bio->bi_end_io == NULL)
>   				continue;
> -			if (!r10_bio->devs[m].bio->bi_status) {
> +			if (!r10_bio->devs[m].bio->bi_status)
>   				rdev_clear_badblocks(
>   					rdev,
>   					r10_bio->devs[m].addr,
>   					r10_bio->sectors, 0);
> -			} else {
> -				if (!rdev_set_badblocks(
> -					    rdev,
> -					    r10_bio->devs[m].addr,
> -					    r10_bio->sectors, 0))
> -					md_error(conf->mddev, rdev);
> -			}
> +			else
> +				rdev_set_badblocks(rdev,
> +						   r10_bio->devs[m].addr,
> +						   r10_bio->sectors, 0);
>   			rdev = conf->mirrors[dev].replacement;
>   			if (r10_bio->devs[m].repl_bio == NULL ||
>   				r10_bio->devs[m].repl_bio->bi_end_io == NULL)
>   				continue;
>   
> -			if (!r10_bio->devs[m].repl_bio->bi_status) {
> +			if (!r10_bio->devs[m].repl_bio->bi_status)
>   				rdev_clear_badblocks(
>   					rdev,
>   					r10_bio->devs[m].addr,
>   					r10_bio->sectors, 0);
> -			} else {
> -				if (!rdev_set_badblocks(
> -					    rdev,
> -					    r10_bio->devs[m].addr,
> -					    r10_bio->sectors, 0))
> -					md_error(conf->mddev, rdev);
> -			}
> +			else
> +				rdev_set_badblocks(rdev,
> +						   r10_bio->devs[m].addr,
> +						   r10_bio->sectors, 0);
>   		}
>   		put_buf(r10_bio);
>   	} else {
> @@ -2942,8 +2936,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
>   				rdev_dec_pending(rdev, conf->mddev);
>   			} else if (bio != NULL && bio->bi_status) {
>   				fail = true;
> -				if (!narrow_write_error(r10_bio, m))
> -					md_error(conf->mddev, rdev);
> +				narrow_write_error(r10_bio, m);
>   				rdev_dec_pending(rdev, conf->mddev);
>   			}
>   			bio = r10_bio->devs[m].repl_bio;
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index b09265fb872a..70106abf2110 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -2817,11 +2817,9 @@ static void raid5_end_read_request(struct bio * bi)
>   		else {
>   			clear_bit(R5_ReadError, &sh->dev[i].flags);
>   			clear_bit(R5_ReWrite, &sh->dev[i].flags);
> -			if (!(set_bad
> -			      && test_bit(In_sync, &rdev->flags)
> -			      && rdev_set_badblocks(
> -				      rdev, sh->sector, RAID5_STRIPE_SECTORS(conf), 0)))
> -				md_error(conf->mddev, rdev);
> +			if (!(set_bad && test_bit(In_sync, &rdev->flags)))
> +				rdev_set_badblocks(rdev, sh->sector,
> +						   RAID5_STRIPE_SECTORS(conf), 0);
>   		}
>   	}
>   	rdev_dec_pending(rdev, conf->mddev);
> @@ -3599,11 +3597,10 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
>   			else
>   				rdev = NULL;
>   			if (rdev) {
> -				if (!rdev_set_badblocks(
> -					    rdev,
> -					    sh->sector,
> -					    RAID5_STRIPE_SECTORS(conf), 0))
> -					md_error(conf->mddev, rdev);
> +				rdev_set_badblocks(rdev,
> +						   sh->sector,
> +						   RAID5_STRIPE_SECTORS(conf),
> +						   0);
>   				rdev_dec_pending(rdev, conf->mddev);
>   			}
>   		}
> @@ -5254,9 +5251,8 @@ static void handle_stripe(struct stripe_head *sh)
>   			if (test_and_clear_bit(R5_WriteError, &dev->flags)) {
>   				/* We own a safe reference to the rdev */
>   				rdev = conf->disks[i].rdev;
> -				if (!rdev_set_badblocks(rdev, sh->sector,
> -							RAID5_STRIPE_SECTORS(conf), 0))
> -					md_error(conf->mddev, rdev);
> +				rdev_set_badblocks(rdev, sh->sector,
> +						   RAID5_STRIPE_SECTORS(conf), 0);
>   				rdev_dec_pending(rdev, conf->mddev);
>   			}
>   			if (test_and_clear_bit(R5_MadeGood, &dev->flags)) {

-- 
Thanks,
Nan