linux-kernel - Re: [PATCH] mm,tmpfs: consider end of file write in shmem_is

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <fdd5a942-6382-49c7-90d9-5b2b1fea9671@linux.alibaba.com>
Date: Mon, 2 Sep 2024 16:36:08 +0800
From: Baolin Wang <baolin.wang@...ux.alibaba.com>
To: Rik van Riel <riel@...riel.com>, Hugh Dickins <hughd@...gle.com>
Cc: kernel-team@...a.com, Andrew Morton <akpm@...ux-foundation.org>,
 linux-mm@...ck.org, linux-kernel@...r.kernel.org,
 Dave Chinner <dchinner@...hat.com>, "Darrick J. Wong" <djwong@...nel.org>,
 Vlastimil Babka <vbabka@...e.cz>
Subject: Re: [PATCH] mm,tmpfs: consider end of file write in shmem_is_huge



On 2024/8/30 11:54, Rik van Riel wrote:
> Take the end of a file write into consideration when deciding whether
> or not to use huge folios for tmpfs files when the tmpfs filesystem is
> mounted with huge=within_size
> 
> This allows large writes that append to the end of a file to automatically
> use large folios.

Make sense to me.

> 
> Doing 4MB squential writes without fallocate to a 16GB tmpfs file:
> - 4kB pages:       1560 MB/s
> - huge=within_size 4720 MB/s
> - huge=always:     4720 MB/s
> 
> Signed-off-by: Rik van Riel <riel@...riel.com>
> ---
>   fs/xfs/scrub/xfile.c     |  6 +++---
>   fs/xfs/xfs_buf_mem.c     |  2 +-
>   include/linux/shmem_fs.h | 12 ++++++-----
>   mm/huge_memory.c         |  2 +-
>   mm/khugepaged.c          |  2 +-
>   mm/shmem.c               | 44 +++++++++++++++++++++-------------------
>   mm/userfaultfd.c         |  2 +-
>   7 files changed, 37 insertions(+), 33 deletions(-)
> 
> diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
> index d848222f802b..e6e1c1fd23cb 100644

[snip]

> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 1d06b1e5408a..846c1ea91f50 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -111,13 +111,15 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
>   int shmem_unuse(unsigned int type);
>   
>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> -extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
> -			  struct mm_struct *mm, unsigned long vm_flags);
> +extern bool shmem_is_huge(struct inode *inode, pgoff_t index, loff_t write_end,
> +	       		  bool shmem_huge_force, struct mm_struct *mm,
> +			  unsigned long vm_flags);
>   unsigned long shmem_allowable_huge_orders(struct inode *inode,
>   				struct vm_area_struct *vma, pgoff_t index,
>   				bool global_huge);
>   #else
> -static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
> +static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index,
> +					  loff_t write_end, bool shmem_huge_force,
>   					  struct mm_struct *mm, unsigned long vm_flags)
>   {
>   	return false;
> @@ -150,8 +152,8 @@ enum sgp_type {
>   	SGP_FALLOC,	/* like SGP_WRITE, but make existing page Uptodate */
>   };
>   
> -int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
> -		enum sgp_type sgp);
> +int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
> +		    struct folio **foliop, enum sgp_type sgp);
>   struct folio *shmem_read_folio_gfp(struct address_space *mapping,
>   		pgoff_t index, gfp_t gfp);
>   
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 67c86a5d64a6..8c09071e78cd 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -160,7 +160,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
>   	 * own flags.
>   	 */
>   	if (!in_pf && shmem_file(vma->vm_file)) {
> -		bool global_huge = shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff,
> +		bool global_huge = shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff, 0,
>   							!enforce_sysfs, vma->vm_mm, vm_flags);
>   
>   		if (!vma_is_anon_shmem(vma))
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index cdd1d8655a76..0ebabff10f97 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1866,7 +1866,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
>   			if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
>   				xas_unlock_irq(&xas);
>   				/* swap in or instantiate fallocated page */
> -				if (shmem_get_folio(mapping->host, index,
> +				if (shmem_get_folio(mapping->host, index, 0,
>   						&folio, SGP_NOALLOC)) {
>   					result = SCAN_FAIL;
>   					goto xa_unlocked;
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 5a77acf6ac6a..964c24fc480f 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -548,7 +548,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
>   
>   static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
>   
> -static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
> +static bool __shmem_is_huge(struct inode *inode, pgoff_t index, loff_t write_end,
>   			    bool shmem_huge_force, struct mm_struct *mm,
>   			    unsigned long vm_flags)
>   {
> @@ -568,7 +568,8 @@ static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
>   		return true;
>   	case SHMEM_HUGE_WITHIN_SIZE:
>   		index = round_up(index + 1, HPAGE_PMD_NR);
> -		i_size = round_up(i_size_read(inode), PAGE_SIZE);
> +		i_size = max(write_end, i_size_read(inode));
> +		i_size = round_up(i_size, PAGE_SIZE);
>   		if (i_size >> PAGE_SHIFT >= index)
>   			return true;
>   		fallthrough;

The shmem_is_huge() is no longer exported and has been renamed to 
shmem_huge_global_enabled() by the series[1]. So you need rebase on the 
latest mm-unstable branch.

[1] 
https://lore.kernel.org/all/cover.1721626645.git.baolin.wang@linux.alibaba.com/T/#md2580130f990af0b1428010bfb4cc789bb865136