[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <b64fc92d-cb30-4b9e-b9c2-5652d50458dd@fwd.mgml.me>
Date: Sun, 21 Sep 2025 23:48:35 +0900
From: Kenta Akagi <k@....mgml.me>
To: Xiao Ni <xni@...hat.com>
Cc: Song Liu <song@...nel.org>, Yu Kuai <yukuai3@...wei.com>,
Mariusz Tkaczyk <mtkaczyk@...nel.org>, Shaohua Li <shli@...com>,
Guoqing Jiang <jgq516@...il.com>, linux-raid@...r.kernel.org,
linux-kernel@...r.kernel.org, Kenta Akagi <k@....mgml.me>
Subject: Re: [PATCH v4 1/9] md/raid1,raid10: Set the LastDev flag when the
configuration changes
Hi,
Thank you for review.
On 2025/09/21 16:54, Xiao Ni wrote:
> Hi Kenta
>
> On Mon, Sep 15, 2025 at 11:44 AM Kenta Akagi <k@...l.me> wrote:
>>
>> Currently, the LastDev flag is set on an rdev that failed a failfast
>> metadata write and called md_error, but did not become Faulty. It is
>> cleared when the metadata write retry succeeds. This has problems for
>> the following reasons:
>>
>> * Despite its name, the flag is only set during a metadata write window.
>> * Unlike when LastDev and Failfast was introduced, md_error on the last
>> rdev of a RAID1/10 array now sets MD_BROKEN. Thus when LastDev is set,
>> the array is already unwritable.
>>
>> A following commit will prevent failfast bios from breaking the array,
>> which requires knowing from outside the personality whether an rdev is
>> the last one. For that purpose, LastDev should be set on rdevs that must
>> not be lost.
>>
>> This commit ensures that LastDev is set on the indispensable rdev in a
>> degraded RAID1/10 array.
>>
>> Signed-off-by: Kenta Akagi <k@...l.me>
>> ---
>> drivers/md/md.c | 4 +---
>> drivers/md/md.h | 6 +++---
>> drivers/md/raid1.c | 34 +++++++++++++++++++++++++++++++++-
>> drivers/md/raid10.c | 34 +++++++++++++++++++++++++++++++++-
>> 4 files changed, 70 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/md/md.c b/drivers/md/md.c
>> index 4e033c26fdd4..268410b66b83 100644
>> --- a/drivers/md/md.c
>> +++ b/drivers/md/md.c
>> @@ -1007,10 +1007,8 @@ static void super_written(struct bio *bio)
>> if (!test_bit(Faulty, &rdev->flags)
>> && (bio->bi_opf & MD_FAILFAST)) {
>> set_bit(MD_SB_NEED_REWRITE, &mddev->sb_flags);
>> - set_bit(LastDev, &rdev->flags);
>> }
>> - } else
>> - clear_bit(LastDev, &rdev->flags);
>> + }
>>
>> bio_put(bio);
>>
>> diff --git a/drivers/md/md.h b/drivers/md/md.h
>> index 51af29a03079..ec598f9a8381 100644
>> --- a/drivers/md/md.h
>> +++ b/drivers/md/md.h
>> @@ -281,9 +281,9 @@ enum flag_bits {
>> * It is expects that no bad block log
>> * is present.
>> */
>> - LastDev, /* Seems to be the last working dev as
>> - * it didn't fail, so don't use FailFast
>> - * any more for metadata
>> + LastDev, /* This is the last working rdev.
>> + * so don't use FailFast any more for
>> + * metadata.
>> */
>> CollisionCheck, /*
>> * check if there is collision between raid1
>> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
>> index bf44878ec640..32ad6b102ff7 100644
>> --- a/drivers/md/raid1.c
>> +++ b/drivers/md/raid1.c
>> @@ -1733,6 +1733,33 @@ static void raid1_status(struct seq_file *seq, struct mddev *mddev)
>> seq_printf(seq, "]");
>> }
>>
>> +/**
>> + * update_lastdev - Set or clear LastDev flag for all rdevs in array
>> + * @conf: pointer to r1conf
>> + *
>> + * Sets LastDev if the device is In_sync and cannot be lost for the array.
>> + * Otherwise, clear it.
>> + *
>> + * Caller must hold ->device_lock.
>> + */
>> +static void update_lastdev(struct r1conf *conf)
>> +{
>> + int i;
>> + int alive_disks = conf->raid_disks - conf->mddev->degraded;
>> +
>> + for (i = 0; i < conf->raid_disks; i++) {
>> + struct md_rdev *rdev = conf->mirrors[i].rdev;
>> +
>> + if (rdev) {
>> + if (test_bit(In_sync, &rdev->flags) &&
>> + alive_disks == 1)
>> + set_bit(LastDev, &rdev->flags);
>> + else
>> + clear_bit(LastDev, &rdev->flags);
>> + }
>> + }
>> +}
>> +
>> /**
>> * raid1_error() - RAID1 error handler.
>> * @mddev: affected md device.
>> @@ -1767,8 +1794,10 @@ static void raid1_error(struct mddev *mddev, struct md_rdev *rdev)
>> }
>> }
>> set_bit(Blocked, &rdev->flags);
>> - if (test_and_clear_bit(In_sync, &rdev->flags))
>> + if (test_and_clear_bit(In_sync, &rdev->flags)) {
>> mddev->degraded++;
>> + update_lastdev(conf);
>> + }
>> set_bit(Faulty, &rdev->flags);
>> spin_unlock_irqrestore(&conf->device_lock, flags);
>> /*
>> @@ -1864,6 +1893,7 @@ static int raid1_spare_active(struct mddev *mddev)
>> }
>> }
>> mddev->degraded -= count;
>> + update_lastdev(conf);
>
> update_lastdev is called in raid1_spare_active, raid1_run and
> raid1_reshape. Could you explain the reason why it needs to call this
> function? Is it the reason you want to clear LastDev flag? If so, is
> it a right place to do it? As your commit message says, it will be
> cleared after retry metadata successfully. In raid1, is it the right
> place that fixes read/write successfully?
The intention was to set LastDev only when the rdev is the last device in the array.
As suggested in review, I will check whether it is the last device when
md_error is called, instead of using a flag.
As a result, update_lastdev() will be removed, and the purpose and behavior
of LastDev will remain unchanged in v5.
Thanks,
Akagi
>
> Best Regards
> Xiao
>
>> spin_unlock_irqrestore(&conf->device_lock, flags);
>>
>> print_conf(conf);
>> @@ -3290,6 +3320,7 @@ static int raid1_run(struct mddev *mddev)
>> rcu_assign_pointer(conf->thread, NULL);
>> mddev->private = conf;
>> set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
>> + update_lastdev(conf);
>>
>> md_set_array_sectors(mddev, raid1_size(mddev, 0, 0));
>>
>> @@ -3427,6 +3458,7 @@ static int raid1_reshape(struct mddev *mddev)
>>
>> spin_lock_irqsave(&conf->device_lock, flags);
>> mddev->degraded += (raid_disks - conf->raid_disks);
>> + update_lastdev(conf);
>> spin_unlock_irqrestore(&conf->device_lock, flags);
>> conf->raid_disks = mddev->raid_disks = raid_disks;
>> mddev->delta_disks = 0;
>> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
>> index b60c30bfb6c7..dc4edd4689f8 100644
>> --- a/drivers/md/raid10.c
>> +++ b/drivers/md/raid10.c
>> @@ -1983,6 +1983,33 @@ static int enough(struct r10conf *conf, int ignore)
>> _enough(conf, 1, ignore);
>> }
>>
>> +/**
>> + * update_lastdev - Set or clear LastDev flag for all rdevs in array
>> + * @conf: pointer to r10conf
>> + *
>> + * Sets LastDev if the device is In_sync and cannot be lost for the array.
>> + * Otherwise, clear it.
>> + *
>> + * Caller must hold ->reconfig_mutex or ->device_lock.
>> + */
>> +static void update_lastdev(struct r10conf *conf)
>> +{
>> + int i;
>> + int raid_disks = max(conf->geo.raid_disks, conf->prev.raid_disks);
>> +
>> + for (i = 0; i < raid_disks; i++) {
>> + struct md_rdev *rdev = conf->mirrors[i].rdev;
>> +
>> + if (rdev) {
>> + if (test_bit(In_sync, &rdev->flags) &&
>> + !enough(conf, i))
>> + set_bit(LastDev, &rdev->flags);
>> + else
>> + clear_bit(LastDev, &rdev->flags);
>> + }
>> + }
>> +}
>> +
>> /**
>> * raid10_error() - RAID10 error handler.
>> * @mddev: affected md device.
>> @@ -2013,8 +2040,10 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
>> return;
>> }
>> }
>> - if (test_and_clear_bit(In_sync, &rdev->flags))
>> + if (test_and_clear_bit(In_sync, &rdev->flags)) {
>> mddev->degraded++;
>> + update_lastdev(conf);
>> + }
>>
>> set_bit(MD_RECOVERY_INTR, &mddev->recovery);
>> set_bit(Blocked, &rdev->flags);
>> @@ -2102,6 +2131,7 @@ static int raid10_spare_active(struct mddev *mddev)
>> }
>> spin_lock_irqsave(&conf->device_lock, flags);
>> mddev->degraded -= count;
>> + update_lastdev(conf);
>> spin_unlock_irqrestore(&conf->device_lock, flags);
>>
>> print_conf(conf);
>> @@ -4159,6 +4189,7 @@ static int raid10_run(struct mddev *mddev)
>> md_set_array_sectors(mddev, size);
>> mddev->resync_max_sectors = size;
>> set_bit(MD_FAILFAST_SUPPORTED, &mddev->flags);
>> + update_lastdev(conf);
>>
>> if (md_integrity_register(mddev))
>> goto out_free_conf;
>> @@ -4567,6 +4598,7 @@ static int raid10_start_reshape(struct mddev *mddev)
>> */
>> spin_lock_irq(&conf->device_lock);
>> mddev->degraded = calc_degraded(conf);
>> + update_lastdev(conf);
>> spin_unlock_irq(&conf->device_lock);
>> mddev->raid_disks = conf->geo.raid_disks;
>> mddev->reshape_position = conf->reshape_progress;
>> --
>> 2.50.1
>>
>>
>
>
Powered by blists - more mailing lists