[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <fdd5a942-6382-49c7-90d9-5b2b1fea9671@linux.alibaba.com>
Date: Mon, 2 Sep 2024 16:36:08 +0800
From: Baolin Wang <baolin.wang@...ux.alibaba.com>
To: Rik van Riel <riel@...riel.com>, Hugh Dickins <hughd@...gle.com>
Cc: kernel-team@...a.com, Andrew Morton <akpm@...ux-foundation.org>,
linux-mm@...ck.org, linux-kernel@...r.kernel.org,
Dave Chinner <dchinner@...hat.com>, "Darrick J. Wong" <djwong@...nel.org>,
Vlastimil Babka <vbabka@...e.cz>
Subject: Re: [PATCH] mm,tmpfs: consider end of file write in shmem_is_huge
On 2024/8/30 11:54, Rik van Riel wrote:
> Take the end of a file write into consideration when deciding whether
> or not to use huge folios for tmpfs files when the tmpfs filesystem is
> mounted with huge=within_size
>
> This allows large writes that append to the end of a file to automatically
> use large folios.
Make sense to me.
>
> Doing 4MB squential writes without fallocate to a 16GB tmpfs file:
> - 4kB pages: 1560 MB/s
> - huge=within_size 4720 MB/s
> - huge=always: 4720 MB/s
>
> Signed-off-by: Rik van Riel <riel@...riel.com>
> ---
> fs/xfs/scrub/xfile.c | 6 +++---
> fs/xfs/xfs_buf_mem.c | 2 +-
> include/linux/shmem_fs.h | 12 ++++++-----
> mm/huge_memory.c | 2 +-
> mm/khugepaged.c | 2 +-
> mm/shmem.c | 44 +++++++++++++++++++++-------------------
> mm/userfaultfd.c | 2 +-
> 7 files changed, 37 insertions(+), 33 deletions(-)
>
> diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
> index d848222f802b..e6e1c1fd23cb 100644
[snip]
> diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
> index 1d06b1e5408a..846c1ea91f50 100644
> --- a/include/linux/shmem_fs.h
> +++ b/include/linux/shmem_fs.h
> @@ -111,13 +111,15 @@ extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
> int shmem_unuse(unsigned int type);
>
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> -extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
> - struct mm_struct *mm, unsigned long vm_flags);
> +extern bool shmem_is_huge(struct inode *inode, pgoff_t index, loff_t write_end,
> + bool shmem_huge_force, struct mm_struct *mm,
> + unsigned long vm_flags);
> unsigned long shmem_allowable_huge_orders(struct inode *inode,
> struct vm_area_struct *vma, pgoff_t index,
> bool global_huge);
> #else
> -static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
> +static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index,
> + loff_t write_end, bool shmem_huge_force,
> struct mm_struct *mm, unsigned long vm_flags)
> {
> return false;
> @@ -150,8 +152,8 @@ enum sgp_type {
> SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
> };
>
> -int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
> - enum sgp_type sgp);
> +int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end,
> + struct folio **foliop, enum sgp_type sgp);
> struct folio *shmem_read_folio_gfp(struct address_space *mapping,
> pgoff_t index, gfp_t gfp);
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 67c86a5d64a6..8c09071e78cd 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -160,7 +160,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
> * own flags.
> */
> if (!in_pf && shmem_file(vma->vm_file)) {
> - bool global_huge = shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff,
> + bool global_huge = shmem_is_huge(file_inode(vma->vm_file), vma->vm_pgoff, 0,
> !enforce_sysfs, vma->vm_mm, vm_flags);
>
> if (!vma_is_anon_shmem(vma))
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index cdd1d8655a76..0ebabff10f97 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1866,7 +1866,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
> if (xa_is_value(folio) || !folio_test_uptodate(folio)) {
> xas_unlock_irq(&xas);
> /* swap in or instantiate fallocated page */
> - if (shmem_get_folio(mapping->host, index,
> + if (shmem_get_folio(mapping->host, index, 0,
> &folio, SGP_NOALLOC)) {
> result = SCAN_FAIL;
> goto xa_unlocked;
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 5a77acf6ac6a..964c24fc480f 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -548,7 +548,7 @@ static bool shmem_confirm_swap(struct address_space *mapping,
>
> static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
>
> -static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
> +static bool __shmem_is_huge(struct inode *inode, pgoff_t index, loff_t write_end,
> bool shmem_huge_force, struct mm_struct *mm,
> unsigned long vm_flags)
> {
> @@ -568,7 +568,8 @@ static bool __shmem_is_huge(struct inode *inode, pgoff_t index,
> return true;
> case SHMEM_HUGE_WITHIN_SIZE:
> index = round_up(index + 1, HPAGE_PMD_NR);
> - i_size = round_up(i_size_read(inode), PAGE_SIZE);
> + i_size = max(write_end, i_size_read(inode));
> + i_size = round_up(i_size, PAGE_SIZE);
> if (i_size >> PAGE_SHIFT >= index)
> return true;
> fallthrough;
The shmem_is_huge() is no longer exported and has been renamed to
shmem_huge_global_enabled() by the series[1]. So you need rebase on the
latest mm-unstable branch.
[1]
https://lore.kernel.org/all/cover.1721626645.git.baolin.wang@linux.alibaba.com/T/#md2580130f990af0b1428010bfb4cc789bb865136
Powered by blists - more mailing lists