linux-ext4 - Re: [PATCH 13/25] ext4: support large block size in ext4_mb_init

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <n3jvaazkla3usq5vx4kxsfkr33d2mwm4eu7xpgf7qssktmjwgu@btxoicdj3vrr>
Date: Wed, 5 Nov 2025 10:18:45 +0100
From: Jan Kara <jack@...e.cz>
To: libaokun@...weicloud.com
Cc: linux-ext4@...r.kernel.org, tytso@....edu, adilger.kernel@...ger.ca, 
	jack@...e.cz, linux-kernel@...r.kernel.org, kernel@...kajraghav.com, 
	mcgrof@...nel.org, linux-fsdevel@...r.kernel.org, linux-mm@...ck.org, 
	yi.zhang@...wei.com, yangerkun@...wei.com, chengzhihao1@...wei.com, 
	libaokun1@...wei.com
Subject: Re: [PATCH 13/25] ext4: support large block size in
 ext4_mb_init_cache()

On Sat 25-10-25 11:22:09, libaokun@...weicloud.com wrote:
> From: Baokun Li <libaokun1@...wei.com>
> 
> Currently, ext4_mb_init_cache() uses blocks_per_page to calculate the
> folio index and offset. However, when blocksize is larger than PAGE_SIZE,
> blocks_per_page becomes zero, leading to a potential division-by-zero bug.
> 
> Since we now have the folio, we know its exact size. This allows us to
> convert {blocks, groups}_per_page to {blocks, groups}_per_folio, thus
> supporting block sizes greater than page size.
> 
> Signed-off-by: Baokun Li <libaokun1@...wei.com>
> Reviewed-by: Zhang Yi <yi.zhang@...wei.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@...e.cz>

								Honza

> ---
>  fs/ext4/mballoc.c | 44 ++++++++++++++++++++------------------------
>  1 file changed, 20 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index d42d768a705a..31f4c7d65eb4 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -1329,26 +1329,25 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
>   * block bitmap and buddy information. The information are
>   * stored in the inode as
>   *
> - * {                        page                        }
> + * {                        folio                        }
>   * [ group 0 bitmap][ group 0 buddy] [group 1][ group 1]...
>   *
>   *
>   * one block each for bitmap and buddy information.
> - * So for each group we take up 2 blocks. A page can
> - * contain blocks_per_page (PAGE_SIZE / blocksize)  blocks.
> - * So it can have information regarding groups_per_page which
> - * is blocks_per_page/2
> + * So for each group we take up 2 blocks. A folio can
> + * contain blocks_per_folio (folio_size / blocksize)  blocks.
> + * So it can have information regarding groups_per_folio which
> + * is blocks_per_folio/2
>   *
>   * Locking note:  This routine takes the block group lock of all groups
> - * for this page; do not hold this lock when calling this routine!
> + * for this folio; do not hold this lock when calling this routine!
>   */
> -
>  static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
>  {
>  	ext4_group_t ngroups;
>  	unsigned int blocksize;
> -	int blocks_per_page;
> -	int groups_per_page;
> +	int blocks_per_folio;
> +	int groups_per_folio;
>  	int err = 0;
>  	int i;
>  	ext4_group_t first_group, group;
> @@ -1365,27 +1364,24 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
>  	sb = inode->i_sb;
>  	ngroups = ext4_get_groups_count(sb);
>  	blocksize = i_blocksize(inode);
> -	blocks_per_page = PAGE_SIZE / blocksize;
> +	blocks_per_folio = folio_size(folio) / blocksize;
> +	WARN_ON_ONCE(!blocks_per_folio);
> +	groups_per_folio = DIV_ROUND_UP(blocks_per_folio, 2);
>  
>  	mb_debug(sb, "init folio %lu\n", folio->index);
>  
> -	groups_per_page = blocks_per_page >> 1;
> -	if (groups_per_page == 0)
> -		groups_per_page = 1;
> -
>  	/* allocate buffer_heads to read bitmaps */
> -	if (groups_per_page > 1) {
> -		i = sizeof(struct buffer_head *) * groups_per_page;
> +	if (groups_per_folio > 1) {
> +		i = sizeof(struct buffer_head *) * groups_per_folio;
>  		bh = kzalloc(i, gfp);
>  		if (bh == NULL)
>  			return -ENOMEM;
>  	} else
>  		bh = &bhs;
>  
> -	first_group = folio->index * blocks_per_page / 2;
> -
>  	/* read all groups the folio covers into the cache */
> -	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
> +	first_group = EXT4_P_TO_LBLK(inode, folio->index) / 2;
> +	for (i = 0, group = first_group; i < groups_per_folio; i++, group++) {
>  		if (group >= ngroups)
>  			break;
>  
> @@ -1393,7 +1389,7 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
>  		if (!grinfo)
>  			continue;
>  		/*
> -		 * If page is uptodate then we came here after online resize
> +		 * If folio is uptodate then we came here after online resize
>  		 * which added some new uninitialized group info structs, so
>  		 * we must skip all initialized uptodate buddies on the folio,
>  		 * which may be currently in use by an allocating task.
> @@ -1413,7 +1409,7 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
>  	}
>  
>  	/* wait for I/O completion */
> -	for (i = 0, group = first_group; i < groups_per_page; i++, group++) {
> +	for (i = 0, group = first_group; i < groups_per_folio; i++, group++) {
>  		int err2;
>  
>  		if (!bh[i])
> @@ -1423,8 +1419,8 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
>  			err = err2;
>  	}
>  
> -	first_block = folio->index * blocks_per_page;
> -	for (i = 0; i < blocks_per_page; i++) {
> +	first_block = EXT4_P_TO_LBLK(inode, folio->index);
> +	for (i = 0; i < blocks_per_folio; i++) {
>  		group = (first_block + i) >> 1;
>  		if (group >= ngroups)
>  			break;
> @@ -1501,7 +1497,7 @@ static int ext4_mb_init_cache(struct folio *folio, char *incore, gfp_t gfp)
>  
>  out:
>  	if (bh) {
> -		for (i = 0; i < groups_per_page; i++)
> +		for (i = 0; i < groups_per_folio; i++)
>  			brelse(bh[i]);
>  		if (bh != &bhs)
>  			kfree(bh);
> -- 
> 2.46.1
> 
-- 
Jan Kara <jack@...e.com>
SUSE Labs, CR