lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aCxcRwFnmd4uspB0@li-dc0c254c-257c-11b2-a85c-98b6c1322444.ibm.com>
Date: Tue, 20 May 2025 16:11:11 +0530
From: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
To: Zhang Yi <yi.zhang@...weicloud.com>
Cc: linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, willy@...radead.org, tytso@....edu,
        adilger.kernel@...ger.ca, jack@...e.cz, yi.zhang@...wei.com,
        libaokun1@...wei.com, yukuai3@...wei.com, yangerkun@...wei.com
Subject: Re: [PATCH v2 1/8] ext4: make ext4_mpage_readpages() support large
 folios

On Mon, May 12, 2025 at 02:33:12PM +0800, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@...wei.com>
> 
> ext4_mpage_readpages() currently assumes that each folio is the size of
> PAGE_SIZE. Modify it to atomically calculate the number of blocks per
> folio and iterate through the blocks in each folio, which would allow
> for support of larger folios.
> 
> Signed-off-by: Zhang Yi <yi.zhang@...wei.com>

Going through this path is pretty confusing. I'm not sure why we keep
falling back to full folio read in case of mixed or non continuous mappings.
Seems like there's some read amplification here. 

Regardless, the changes here look okay to me.

Feel free to add:

Reviewed-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>

Thanks,
ojaswin
> ---
>  fs/ext4/readpage.c | 28 +++++++++++++++++-----------
>  1 file changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
> index 5d3a9dc9a32d..f329daf6e5c7 100644
> --- a/fs/ext4/readpage.c
> +++ b/fs/ext4/readpage.c
> @@ -227,24 +227,30 @@ int ext4_mpage_readpages(struct inode *inode,
>  	int length;
>  	unsigned relative_block = 0;
>  	struct ext4_map_blocks map;
> -	unsigned int nr_pages = rac ? readahead_count(rac) : 1;
> +	unsigned int nr_pages, folio_pages;
>  
>  	map.m_pblk = 0;
>  	map.m_lblk = 0;
>  	map.m_len = 0;
>  	map.m_flags = 0;
>  
> -	for (; nr_pages; nr_pages--) {
> +	nr_pages = rac ? readahead_count(rac) : folio_nr_pages(folio);
> +	for (; nr_pages; nr_pages -= folio_pages) {
>  		int fully_mapped = 1;
> -		unsigned first_hole = blocks_per_page;
> +		unsigned int first_hole;
> +		unsigned int blocks_per_folio;
>  
>  		if (rac)
>  			folio = readahead_folio(rac);
> +
> +		folio_pages = folio_nr_pages(folio);
>  		prefetchw(&folio->flags);
>  
>  		if (folio_buffers(folio))
>  			goto confused;
>  
> +		blocks_per_folio = folio_size(folio) >> blkbits;
> +		first_hole = blocks_per_folio;
>  		block_in_file = next_block =
>  			(sector_t)folio->index << (PAGE_SHIFT - blkbits);
>  		last_block = block_in_file + nr_pages * blocks_per_page;
> @@ -270,7 +276,7 @@ int ext4_mpage_readpages(struct inode *inode,
>  					map.m_flags &= ~EXT4_MAP_MAPPED;
>  					break;
>  				}
> -				if (page_block == blocks_per_page)
> +				if (page_block == blocks_per_folio)
>  					break;
>  				page_block++;
>  				block_in_file++;
> @@ -281,7 +287,7 @@ int ext4_mpage_readpages(struct inode *inode,
>  		 * Then do more ext4_map_blocks() calls until we are
>  		 * done with this folio.
>  		 */
> -		while (page_block < blocks_per_page) {
> +		while (page_block < blocks_per_folio) {
>  			if (block_in_file < last_block) {
>  				map.m_lblk = block_in_file;
>  				map.m_len = last_block - block_in_file;
> @@ -296,13 +302,13 @@ int ext4_mpage_readpages(struct inode *inode,
>  			}
>  			if ((map.m_flags & EXT4_MAP_MAPPED) == 0) {
>  				fully_mapped = 0;
> -				if (first_hole == blocks_per_page)
> +				if (first_hole == blocks_per_folio)
>  					first_hole = page_block;
>  				page_block++;
>  				block_in_file++;
>  				continue;
>  			}
> -			if (first_hole != blocks_per_page)
> +			if (first_hole != blocks_per_folio)
>  				goto confused;		/* hole -> non-hole */
>  
>  			/* Contiguous blocks? */
> @@ -315,13 +321,13 @@ int ext4_mpage_readpages(struct inode *inode,
>  					/* needed? */
>  					map.m_flags &= ~EXT4_MAP_MAPPED;
>  					break;
> -				} else if (page_block == blocks_per_page)
> +				} else if (page_block == blocks_per_folio)
>  					break;
>  				page_block++;
>  				block_in_file++;
>  			}
>  		}
> -		if (first_hole != blocks_per_page) {
> +		if (first_hole != blocks_per_folio) {
>  			folio_zero_segment(folio, first_hole << blkbits,
>  					  folio_size(folio));
>  			if (first_hole == 0) {
> @@ -367,11 +373,11 @@ int ext4_mpage_readpages(struct inode *inode,
>  
>  		if (((map.m_flags & EXT4_MAP_BOUNDARY) &&
>  		     (relative_block == map.m_len)) ||
> -		    (first_hole != blocks_per_page)) {
> +		    (first_hole != blocks_per_folio)) {
>  			submit_bio(bio);
>  			bio = NULL;
>  		} else
> -			last_block_in_bio = first_block + blocks_per_page - 1;
> +			last_block_in_bio = first_block + blocks_per_folio - 1;
>  		continue;
>  	confused:
>  		if (bio) {
> -- 
> 2.46.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ