linux-kernel - Re: [PATCH -next 2/8] md: also clone new io if io accounting is disabled

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CALTww2-p6j94zxYfaDe5eHaDJSc+w86LBq1K9jDT1HhKmOOyiQ@mail.gmail.com>
Date:   Tue, 20 Jun 2023 16:48:19 +0800
From:   Xiao Ni <xni@...hat.com>
To:     Yu Kuai <yukuai1@...weicloud.com>
Cc:     song@...nel.org, linux-raid@...r.kernel.org,
        linux-kernel@...r.kernel.org, yukuai3@...wei.com,
        yi.zhang@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH -next 2/8] md: also clone new io if io accounting is disabled

On Mon, Jun 19, 2023 at 8:49 PM Yu Kuai <yukuai1@...weicloud.com> wrote:
>
> From: Yu Kuai <yukuai3@...wei.com>
>
> Currently, 'active_io' is grabed before make_reqeust() is called, and
> it's dropped immediately make_reqeust() returns. Hence 'active_io'
> actually means io is dispatching, not io is inflight.

Hi Kuai

There are three typo errors in the commit message: s/grabed/grabbed/g

>
> For raid0 and raid456 that io accounting is enabled, 'active_io' will
> also be grabed when bio is cloned for io accounting, and this 'active_io'
> is dropped until io is done.
>
> Always clone new bio so that 'active_io' will mean that io is inflight,
> raid1 and raid10 will switch to use this method in later patches. Once
> these are implemented, it can be cleaned up that 'active_io' is grabed
> twice for one io.
>
> Now that bio will be cloned even if io accounting is disabled, also
> rename related structure from '*_acct_*' to '*_clone_*'.
>
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
>  drivers/md/md.c    | 61 ++++++++++++++++++++++++----------------------
>  drivers/md/md.h    |  4 +--
>  drivers/md/raid5.c | 18 +++++++-------
>  3 files changed, 43 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index 42347289195a..5ad8e7f3aebd 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -2314,7 +2314,7 @@ int md_integrity_register(struct mddev *mddev)
>         pr_debug("md: data integrity enabled on %s\n", mdname(mddev));
>         if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) ||
>             (mddev->level != 1 && mddev->level != 10 &&
> -            bioset_integrity_create(&mddev->io_acct_set, BIO_POOL_SIZE))) {
> +            bioset_integrity_create(&mddev->io_clone_set, BIO_POOL_SIZE))) {
>                 /*
>                  * No need to handle the failure of bioset_integrity_create,
>                  * because the function is called by md_run() -> pers->run(),
> @@ -5886,9 +5886,9 @@ int md_run(struct mddev *mddev)
>                         goto exit_bio_set;
>         }
>
> -       if (!bioset_initialized(&mddev->io_acct_set)) {
> -               err = bioset_init(&mddev->io_acct_set, BIO_POOL_SIZE,
> -                                 offsetof(struct md_io_acct, bio_clone), 0);
> +       if (!bioset_initialized(&mddev->io_clone_set)) {
> +               err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
> +                                 offsetof(struct md_io_clone, bio_clone), 0);
>                 if (err)
>                         goto exit_sync_set;
>         }
> @@ -6070,7 +6070,7 @@ int md_run(struct mddev *mddev)
>         module_put(pers->owner);
>         md_bitmap_destroy(mddev);
>  abort:
> -       bioset_exit(&mddev->io_acct_set);
> +       bioset_exit(&mddev->io_clone_set);
>  exit_sync_set:
>         bioset_exit(&mddev->sync_set);
>  exit_bio_set:
> @@ -6295,7 +6295,7 @@ static void __md_stop(struct mddev *mddev)
>         percpu_ref_exit(&mddev->active_io);
>         bioset_exit(&mddev->bio_set);
>         bioset_exit(&mddev->sync_set);
> -       bioset_exit(&mddev->io_acct_set);
> +       bioset_exit(&mddev->io_clone_set);
>  }
>
>  void md_stop(struct mddev *mddev)
> @@ -8661,45 +8661,48 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
>  }
>  EXPORT_SYMBOL_GPL(md_submit_discard_bio);
>
> -static void md_end_io_acct(struct bio *bio)
> +static void md_end_clone_io(struct bio *bio)
>  {
> -       struct md_io_acct *md_io_acct = bio->bi_private;
> -       struct bio *orig_bio = md_io_acct->orig_bio;
> -       struct mddev *mddev = md_io_acct->mddev;
> +       struct md_io_clone *md_io_clone = bio->bi_private;
> +       struct bio *orig_bio = md_io_clone->orig_bio;
> +       struct mddev *mddev = md_io_clone->mddev;
>
>         orig_bio->bi_status = bio->bi_status;
>
> -       bio_end_io_acct(orig_bio, md_io_acct->start_time);
> +       if (md_io_clone->start_time)
> +               bio_end_io_acct(orig_bio, md_io_clone->start_time);
> +
>         bio_put(bio);
>         bio_endio(orig_bio);
> -
>         percpu_ref_put(&mddev->active_io);
>  }
>
> +static void md_clone_bio(struct mddev *mddev, struct bio **bio)
> +{
> +       struct block_device *bdev = (*bio)->bi_bdev;
> +       struct md_io_clone *md_io_clone;
> +       struct bio *clone =
> +               bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_clone_set);
> +
> +       md_io_clone = container_of(clone, struct md_io_clone, bio_clone);
> +       md_io_clone->orig_bio = *bio;
> +       md_io_clone->mddev = mddev;
> +       if (blk_queue_io_stat(bdev->bd_disk->queue))
> +               md_io_clone->start_time = bio_start_io_acct(*bio);
> +
> +       clone->bi_end_io = md_end_clone_io;
> +       clone->bi_private = md_io_clone;
> +       *bio = clone;
> +}
> +
>  /*
>   * Used by personalities that don't already clone the bio and thus can't
>   * easily add the timestamp to their extended bio structure.
>   */
>  void md_account_bio(struct mddev *mddev, struct bio **bio)
>  {
> -       struct block_device *bdev = (*bio)->bi_bdev;
> -       struct md_io_acct *md_io_acct;
> -       struct bio *clone;
> -
> -       if (!blk_queue_io_stat(bdev->bd_disk->queue))
> -               return;
> -
>         percpu_ref_get(&mddev->active_io);
> -
> -       clone = bio_alloc_clone(bdev, *bio, GFP_NOIO, &mddev->io_acct_set);
> -       md_io_acct = container_of(clone, struct md_io_acct, bio_clone);
> -       md_io_acct->orig_bio = *bio;
> -       md_io_acct->start_time = bio_start_io_acct(*bio);
> -       md_io_acct->mddev = mddev;
> -
> -       clone->bi_end_io = md_end_io_acct;
> -       clone->bi_private = md_io_acct;
> -       *bio = clone;
> +       md_clone_bio(mddev, bio);
>  }
>  EXPORT_SYMBOL_GPL(md_account_bio);
>
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 11299d94b239..892a598a5029 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -510,7 +510,7 @@ struct mddev {
>         struct bio_set                  sync_set; /* for sync operations like
>                                                    * metadata and bitmap writes
>                                                    */
> -       struct bio_set                  io_acct_set; /* for raid0 and raid5 io accounting */
> +       struct bio_set                  io_clone_set;
>
>         /* Generic flush handling.
>          * The last to finish preflush schedules a worker to submit
> @@ -738,7 +738,7 @@ struct md_thread {
>         void                    *private;
>  };
>
> -struct md_io_acct {
> +struct md_io_clone {
>         struct mddev    *mddev;
>         struct bio      *orig_bio;
>         unsigned long   start_time;
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index 29cf5455d7a5..cef0b400b2ee 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -5468,13 +5468,13 @@ static struct bio *remove_bio_from_retry(struct r5conf *conf,
>   */
>  static void raid5_align_endio(struct bio *bi)
>  {
> -       struct md_io_acct *md_io_acct = bi->bi_private;
> -       struct bio *raid_bi = md_io_acct->orig_bio;
> +       struct md_io_clone *md_io_clone = bi->bi_private;
> +       struct bio *raid_bi = md_io_clone->orig_bio;
>         struct mddev *mddev;
>         struct r5conf *conf;
>         struct md_rdev *rdev;
>         blk_status_t error = bi->bi_status;
> -       unsigned long start_time = md_io_acct->start_time;
> +       unsigned long start_time = md_io_clone->start_time;
>
>         bio_put(bi);
>
> @@ -5506,7 +5506,7 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>         struct md_rdev *rdev;
>         sector_t sector, end_sector, first_bad;
>         int bad_sectors, dd_idx;
> -       struct md_io_acct *md_io_acct;
> +       struct md_io_clone *md_io_clone;
>         bool did_inc;
>
>         if (!in_chunk_boundary(mddev, raid_bio)) {
> @@ -5544,15 +5544,15 @@ static int raid5_read_one_chunk(struct mddev *mddev, struct bio *raid_bio)
>         }
>
>         align_bio = bio_alloc_clone(rdev->bdev, raid_bio, GFP_NOIO,
> -                                   &mddev->io_acct_set);
> -       md_io_acct = container_of(align_bio, struct md_io_acct, bio_clone);
> +                                   &mddev->io_clone_set);
> +       md_io_clone = container_of(align_bio, struct md_io_clone, bio_clone);
>         raid_bio->bi_next = (void *)rdev;
>         if (blk_queue_io_stat(raid_bio->bi_bdev->bd_disk->queue))
> -               md_io_acct->start_time = bio_start_io_acct(raid_bio);
> -       md_io_acct->orig_bio = raid_bio;
> +               md_io_clone->start_time = bio_start_io_acct(raid_bio);
> +       md_io_clone->orig_bio = raid_bio;
>
>         align_bio->bi_end_io = raid5_align_endio;
> -       align_bio->bi_private = md_io_acct;
> +       align_bio->bi_private = md_io_clone;
>         align_bio->bi_iter.bi_sector = sector;
>
>         /* No reshape active, so we can trust rdev->data_offset */
> --
> 2.39.2
>

Reviewed-by: Xiao Ni <xni@...hat.com>