linux-ext4 - Re: [PATCH v2 1/8] ext4: make ext4_mpage

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite for Android: free password hash cracker in your pocket

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <aCxcRwFnmd4uspB0@li-dc0c254c-257c-11b2-a85c-98b6c1322444.ibm.com>
Date: Tue, 20 May 2025 16:11:11 +0530
From: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
To: Zhang Yi <yi.zhang@...weicloud.com>
Cc: linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, willy@...radead.org, tytso@....edu,
        adilger.kernel@...ger.ca, jack@...e.cz, yi.zhang@...wei.com,
        libaokun1@...wei.com, yukuai3@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH v2 1/8] ext4: make ext4_mpage_readpages() support large
 folios

On Mon, May 12, 2025 at 02:33:12PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@...wei.com>
> 
> ext4_mpage_readpages() currently assumes that each folio is the size of
> PAGE_SIZE. Modify it to atomically calculate the number of blocks per
> folio and iterate through the blocks in each folio, which would allow
> for support of larger folios.
> 
> Signed-off-by: Zhang Yi <yi.zhang@...wei.com>

Going through this path is pretty confusing. I'm not sure why we keep
falling back to full folio read in case of mixed or non continuous mappings.
Seems like there's some read amplification here. 

Regardless, the changes here look okay to me.

Feel free to add:

Reviewed-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>

Thanks,
ojaswin
> ---
>  fs/ext4/readpage.c | 28 +++++++++++++++++-----------
>  1 file changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index 5d3a9dc9a32d..f329daf6e5c7 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -227,24 +227,30 @@ int ext4_mpage_readpages(struct inode *inode,
>  	int length;
>  	unsigned relative_block = 0;
>  	struct ext4_map_blocks map;
> -	unsigned int nr_pages = rac ? readahead_count(rac) : 1;
> +	unsigned int nr_pages, folio_pages;
>  
>  	map.m_pblk = 0;
>  	map.m_lblk = 0;
>  	map.m_len = 0;
>  	map.m_flags = 0;
>  
> -	for (; nr_pages; nr_pages--) {
> +	nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio);
> +	for (; nr_pages; nr_pages -= folio_pages) {
>  		int fully_mapped = 1;
> -		unsigned first_hole = blocks_per_page;
> +		unsigned int first_hole;
> +		unsigned int blocks_per_folio;
>  
>  		if (rac)
>  			folio = readahead_folio(rac);
> +
> +		folio_pages = folio_nr_pages(folio);
>  		prefetchw(&folio->flags);
>  
>  		if (folio_buffers(folio))
>  			goto confused;
>  
> +		blocks_per_folio = folio_size(folio) >> blkbits;
> +		first_hole = blocks_per_folio;
>  		block_in_file = next_block =
>  			(sector_t)folio->index << (PAGE_SHIFT - blkbits);
>  		last_block = block_in_file + nr_pages * blocks_per_page;
> @@ -270,7 +276,7 @@ int ext4_mpage_readpages(struct inode *inode,
>  					map.m_flags &= ~EXT4_MAP_MAPPED;
>  					break;
>  				}
> -				if (page_block == blocks_per_page)
> +				if (page_block == blocks_per_folio)
>  					break;
>  				page_block++;
>  				block_in_file++;
> @@ -281,7 +287,7 @@ int ext4_mpage_readpages(struct inode *inode,
>  		 * Then do more ext4_map_blocks() calls until we are
>  		 * done with this folio.
>  		 */
> -		while (page_block < blocks_per_page) {
> +		while (page_block < blocks_per_folio) {
>  			if (block_in_file < last_block) {
>  				map.m_lblk = block_in_file;
>  				map.m_len = last_block - block_in_file;
> @@ -296,13 +302,13 @@ int ext4_mpage_readpages(struct inode *inode,
>  			}
>  			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
>  				fully_mapped = 0;
> -				if (first_hole == blocks_per_page)
> +				if (first_hole == blocks_per_folio)
>  					first_hole = page_block;
>  				page_block++;
>  				block_in_file++;
>  				continue;
>  			}
> -			if (first_hole != blocks_per_page)
> +			if (first_hole != blocks_per_folio)
>  				goto confused;		/* hole -> non-hole */
>  
>  			/* Contiguous blocks? */
> @@ -315,13 +321,13 @@ int ext4_mpage_readpages(struct inode *inode,
>  					/* needed? */
>  					map.m_flags &= ~EXT4_MAP_MAPPED;
>  					break;
> -				} else if (page_block == blocks_per_page)
> +				} else if (page_block == blocks_per_folio)
>  					break;
>  				page_block++;
>  				block_in_file++;
>  			}
>  		}
> -		if (first_hole != blocks_per_page) {
> +		if (first_hole != blocks_per_folio) {
>  			folio_zero_segment(folio, first_hole << blkbits,
>  					  folio_size(folio));
>  			if (first_hole == 0) {
> @@ -367,11 +373,11 @@ int ext4_mpage_readpages(struct inode *inode,
>  
>  		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
>  		     (relative_block == map.m_len)) ||
> -		    (first_hole != blocks_per_page)) {
> +		    (first_hole != blocks_per_folio)) {
>  			submit_bio(bio);
>  			bio = NULL;
>  		} else
> -			last_block_in_bio = first_block + blocks_per_page - 1;
> +			last_block_in_bio = first_block + blocks_per_folio - 1;
>  		continue;
>  	confused:
>  		if (bio) {
> -- 
> 2.46.1
>