[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <30ae98d1-7947-ff39-fde7-04f8fe94b433@huaweicloud.com>
Date: Tue, 30 Dec 2025 17:38:04 +0800
From: Li Nan <linan666@...weicloud.com>
To: Yu Kuai <yukuai@...as.com>, song@...nel.org, linux-raid@...r.kernel.org
Cc: linux-kernel@...r.kernel.org, filippo@...ian.org, colyli@...as.com
Subject: Re: [PATCH v2 04/11] md/raid5: use mempool to allocate
stripe_request_ctx
在 2025/11/24 14:31, Yu Kuai 写道:
> On the one hand, stripe_request_ctx is 72 bytes, and it's a bit huge for
> a stack variable.
>
> On the other hand, the bitmap sectors_to_do is a fixed size, result in
> max_hw_sector_kb of raid5 array is at most 256 * 4k = 1Mb, and this will
> make full stripe IO impossible for the array that chunk_size * data_disks
> is bigger. Allocate ctx during runtime will make it possible to get rid
> of this limit.
>
> Signed-off-by: Yu Kuai <yukuai@...as.com>
> ---
> drivers/md/md.h | 4 +++
> drivers/md/raid1-10.c | 5 ----
> drivers/md/raid5.c | 61 +++++++++++++++++++++++++++----------------
> drivers/md/raid5.h | 2 ++
> 4 files changed, 45 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/md/md.h b/drivers/md/md.h
> index 6ee18045f41c..b8c5dec12b62 100644
> --- a/drivers/md/md.h
> +++ b/drivers/md/md.h
> @@ -22,6 +22,10 @@
> #include <trace/events/block.h>
>
> #define MaxSector (~(sector_t)0)
> +/*
> + * Number of guaranteed raid bios in case of extreme VM load:
> + */
> +#define NR_RAID_BIOS 256
>
> enum md_submodule_type {
> MD_PERSONALITY = 0,
> diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
> index 521625756128..c33099925f23 100644
> --- a/drivers/md/raid1-10.c
> +++ b/drivers/md/raid1-10.c
> @@ -3,11 +3,6 @@
> #define RESYNC_BLOCK_SIZE (64*1024)
> #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
>
> -/*
> - * Number of guaranteed raid bios in case of extreme VM load:
> - */
> -#define NR_RAID_BIOS 256
> -
> /* when we get a read error on a read-only array, we redirect to another
> * device without failing the first device, or trying to over-write to
> * correct the read error. To keep track of bad blocks on a per-bio
> diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
> index f405ba7b99a7..0080dec4a6ef 100644
> --- a/drivers/md/raid5.c
> +++ b/drivers/md/raid5.c
> @@ -6083,13 +6083,13 @@ static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf,
> static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
> {
> DEFINE_WAIT_FUNC(wait, woken_wake_function);
> - bool on_wq;
> struct r5conf *conf = mddev->private;
> - sector_t logical_sector;
> - struct stripe_request_ctx ctx = {};
> const int rw = bio_data_dir(bi);
> + struct stripe_request_ctx *ctx;
> + sector_t logical_sector;
> enum stripe_result res;
> int s, stripe_cnt;
> + bool on_wq;
>
> if (unlikely(bi->bi_opf & REQ_PREFLUSH)) {
> int ret = log_handle_flush_request(conf, bi);
> @@ -6101,11 +6101,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
> return true;
> }
> /* ret == -EAGAIN, fallback */
> - /*
> - * if r5l_handle_flush_request() didn't clear REQ_PREFLUSH,
> - * we need to flush journal device
> - */
> - ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
> }
>
> md_write_start(mddev, bi);
> @@ -6128,16 +6123,24 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
> }
>
> logical_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
> - ctx.first_sector = logical_sector;
> - ctx.last_sector = bio_end_sector(bi);
> bi->bi_next = NULL;
>
> - stripe_cnt = DIV_ROUND_UP_SECTOR_T(ctx.last_sector - logical_sector,
> + ctx = mempool_alloc(conf->ctx_pool, GFP_NOIO | __GFP_ZERO);
In mempool_alloc_noprof():
VM_WARN_ON_ONCE(gfp_mask & __GFP_ZERO);
__GFP_ZERO should be removed and ensure init before accessing the members.
--
Thanks,
Nan
Powered by blists - more mailing lists