linux-kernel - Re: [PATCH md-6.9 06/10] md/raid1: factor out read_first_rdev() from read

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALTww2-ypx2YJOeXTzj7Y0EtXMkfrTOAJzzmDnnUK=1irspWtQ@mail.gmail.com>
Date: Mon, 26 Feb 2024 22:16:51 +0800
From: Xiao Ni <xni@...hat.com>
To: Yu Kuai <yukuai1@...weicloud.com>
Cc: paul.e.luse@...ux.intel.com, song@...nel.org, neilb@...e.com, shli@...com, 
	linux-raid@...r.kernel.org, linux-kernel@...r.kernel.org, yukuai3@...wei.com, 
	yi.zhang@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH md-6.9 06/10] md/raid1: factor out read_first_rdev() from read_balance()

On Thu, Feb 22, 2024 at 4:04 PM Yu Kuai <yukuai1@...weicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@...wei.com>
>
> read_balance() is hard to understand because there are too many status
> and branches, and it's overlong.
>
> This patch factor out the case to read the first rdev from
> read_balance(), there are no functional changes.
>
> Co-developed-by: Paul Luse <paul.e.luse@...ux.intel.com>
> Signed-off-by: Paul Luse <paul.e.luse@...ux.intel.com>
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
>  drivers/md/raid1.c | 63 +++++++++++++++++++++++++++++++++-------------
>  1 file changed, 46 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 8089c569e84f..08c45ca55a7e 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -579,6 +579,47 @@ static sector_t align_to_barrier_unit_end(sector_t start_sector,
>         return len;
>  }
>
> +static void update_read_sectors(struct r1conf *conf, int disk,
> +                               sector_t this_sector, int len)
> +{
> +       struct raid1_info *info = &conf->mirrors[disk];
> +
> +       atomic_inc(&info->rdev->nr_pending);
> +       if (info->next_seq_sect != this_sector)
> +               info->seq_start = this_sector;
> +       info->next_seq_sect = this_sector + len;
> +}
> +
> +static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
> +                            int *max_sectors)
> +{
> +       sector_t this_sector = r1_bio->sector;
> +       int len = r1_bio->sectors;
> +       int disk;
> +
> +       for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
> +               struct md_rdev *rdev;
> +               int read_len;
> +
> +               if (r1_bio->bios[disk] == IO_BLOCKED)
> +                       continue;
> +
> +               rdev = conf->mirrors[disk].rdev;
> +               if (!rdev || test_bit(Faulty, &rdev->flags))
> +                       continue;
> +
> +               /* choose the first disk even if it has some bad blocks. */
> +               read_len = raid1_check_read_range(rdev, this_sector, &len);
> +               if (read_len > 0) {
> +                       update_read_sectors(conf, disk, this_sector, read_len);
> +                       *max_sectors = read_len;
> +                       return disk;
> +               }

Hi Kuai

It needs to update max_sectors even if the bad block starts before
this_sector. Because it can't read more than bad_blocks from other
member disks. If it reads more data than bad blocks, it will cause
data corruption. One rule here is read from the primary disk (the
first readable disk) if it has no bad block and read the
badblock-data-length data from other disks.

Best Regards
Xiao

> +       }
> +
> +       return -1;
> +}
> +
>  /*
>   * This routine returns the disk from which the requested read should
>   * be done. There is a per-array 'next expected sequential IO' sector
> @@ -603,7 +644,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>         sector_t best_dist;
>         unsigned int min_pending;
>         struct md_rdev *rdev;
> -       int choose_first;
>
>   retry:
>         sectors = r1_bio->sectors;
> @@ -613,10 +653,11 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>         best_pending_disk = -1;
>         min_pending = UINT_MAX;
>         best_good_sectors = 0;
> -       choose_first = raid1_should_read_first(conf->mddev, this_sector,
> -                                              sectors);
>         clear_bit(R1BIO_FailFast, &r1_bio->state);
>
> +       if (raid1_should_read_first(conf->mddev, this_sector, sectors))
> +               return choose_first_rdev(conf, r1_bio, max_sectors);
> +
>         for (disk = 0 ; disk < conf->raid_disks * 2 ; disk++) {
>                 sector_t dist;
>                 sector_t first_bad;
> @@ -662,8 +703,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>                                  * bad_sectors from another device..
>                                  */
>                                 bad_sectors -= (this_sector - first_bad);
> -                               if (choose_first && sectors > bad_sectors)
> -                                       sectors = bad_sectors;
>                                 if (best_good_sectors > sectors)
>                                         best_good_sectors = sectors;
>
> @@ -673,8 +712,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>                                         best_good_sectors = good_sectors;
>                                         best_disk = disk;
>                                 }
> -                               if (choose_first)
> -                                       break;
>                         }
>                         continue;
>                 } else {
> @@ -689,10 +726,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>
>                 pending = atomic_read(&rdev->nr_pending);
>                 dist = abs(this_sector - conf->mirrors[disk].head_position);
> -               if (choose_first) {
> -                       best_disk = disk;
> -                       break;
> -               }
>                 /* Don't change to another disk for sequential reads */
>                 if (conf->mirrors[disk].next_seq_sect == this_sector
>                     || dist == 0) {
> @@ -760,13 +793,9 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
>                 rdev = conf->mirrors[best_disk].rdev;
>                 if (!rdev)
>                         goto retry;
> -               atomic_inc(&rdev->nr_pending);
> -               sectors = best_good_sectors;
> -
> -               if (conf->mirrors[best_disk].next_seq_sect != this_sector)
> -                       conf->mirrors[best_disk].seq_start = this_sector;
>
> -               conf->mirrors[best_disk].next_seq_sect = this_sector + sectors;
> +               sectors = best_good_sectors;
> +               update_read_sectors(conf, disk, this_sector, sectors);
>         }
>         *max_sectors = sectors;
>
> --
> 2.39.2
>
>