[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <365027D2-9BB3-439A-9080-2684E1795B7F@nvidia.com>
Date: Thu, 14 Aug 2025 11:14:51 -0400
From: Zi Yan <ziy@...dia.com>
To: Usama Arif <usamaarif642@...il.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>, david@...hat.com,
linux-mm@...ck.org, linux-fsdevel@...r.kernel.org, corbet@....net,
rppt@...nel.org, surenb@...gle.com, mhocko@...e.com, hannes@...xchg.org,
baohua@...nel.org, shakeel.butt@...ux.dev, riel@...riel.com,
laoar.shao@...il.com, dev.jain@....com, baolin.wang@...ux.alibaba.com,
npache@...hat.com, lorenzo.stoakes@...cle.com, Liam.Howlett@...cle.com,
ryan.roberts@....com, vbabka@...e.cz, jannh@...gle.com,
Arnd Bergmann <arnd@...db.de>, sj@...nel.org, linux-kernel@...r.kernel.org,
linux-doc@...r.kernel.org, kernel-team@...a.com
Subject: Re: [PATCH v4 3/7] mm/huge_memory: respect MADV_COLLAPSE with
PR_THP_DISABLE_EXCEPT_ADVISED
On 13 Aug 2025, at 9:55, Usama Arif wrote:
> From: David Hildenbrand <david@...hat.com>
>
> Let's allow for making MADV_COLLAPSE succeed on areas that neither have
> VM_HUGEPAGE nor VM_NOHUGEPAGE when we have THP disabled
> unless explicitly advised (PR_THP_DISABLE_EXCEPT_ADVISED).
>
> MADV_COLLAPSE is a clear advice that we want to collapse.
>
> Note that we still respect the VM_NOHUGEPAGE flag, just like
> MADV_COLLAPSE always does. So consequently, MADV_COLLAPSE is now only
> refused on VM_NOHUGEPAGE with PR_THP_DISABLE_EXCEPT_ADVISED,
> including for shmem.
>
> Co-developed-by: Usama Arif <usamaarif642@...il.com>
> Signed-off-by: Usama Arif <usamaarif642@...il.com>
> Signed-off-by: David Hildenbrand <david@...hat.com>
> Reviewed-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
> Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
> ---
> include/linux/huge_mm.h | 8 +++++++-
> include/uapi/linux/prctl.h | 2 +-
> mm/huge_memory.c | 5 +++--
> mm/memory.c | 6 ++++--
> mm/shmem.c | 2 +-
> 5 files changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index 92ea0b9771fae..1ac0d06fb3c1d 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -329,7 +329,7 @@ struct thpsize {
> * through madvise or prctl.
> */
> static inline bool vma_thp_disabled(struct vm_area_struct *vma,
> - vm_flags_t vm_flags)
> + vm_flags_t vm_flags, bool forced_collapse)
> {
> /* Are THPs disabled for this VMA? */
> if (vm_flags & VM_NOHUGEPAGE)
> @@ -343,6 +343,12 @@ static inline bool vma_thp_disabled(struct vm_area_struct *vma,
> */
> if (vm_flags & VM_HUGEPAGE)
> return false;
> + /*
> + * Forcing a collapse (e.g., madv_collapse), is a clear advice to
> + * use THPs.
> + */
> + if (forced_collapse)
> + return false;
> return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm);
> }
>
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 150b6deebfb1e..51c4e8c82b1e9 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -185,7 +185,7 @@ struct prctl_mm_map {
> #define PR_SET_THP_DISABLE 41
> /*
> * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE /
> - * VM_HUGEPAGE).
> + * VM_HUGEPAGE, MADV_COLLAPSE).
> */
> # define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1)
> #define PR_GET_THP_DISABLE 42
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9c716be949cbf..1eca2d543449c 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -104,7 +104,8 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
> {
> const bool smaps = type == TVA_SMAPS;
> const bool in_pf = type == TVA_PAGEFAULT;
> - const bool enforce_sysfs = type != TVA_FORCED_COLLAPSE;
> + const bool forced_collapse = type == TVA_FORCED_COLLAPSE;
> + const bool enforce_sysfs = !forced_collapse;
> unsigned long supported_orders;
>
> /* Check the intersection of requested and supported orders. */
> @@ -122,7 +123,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
> if (!vma->vm_mm) /* vdso */
> return 0;
>
> - if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags))
> + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vm_flags, forced_collapse))
> return 0;
>
> /* khugepaged doesn't collapse DAX vma, but page fault is fine. */
> diff --git a/mm/memory.c b/mm/memory.c
> index 7b1e8f137fa3f..e4f533655305a 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -5332,9 +5332,11 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa
> * It is too late to allocate a small folio, we already have a large
> * folio in the pagecache: especially s390 KVM cannot tolerate any
> * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any
> - * PMD mappings if THPs are disabled.
> + * PMD mappings if THPs are disabled. As we already have a THP ...
> + * behave as if we are forcing a collapse.
What does the “...” mean here?
Shouldn’t it be:
As we already have a THP,
behave as if we are forcing a collapse.
> */
> - if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags))
> + if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags,
> + /* forced_collapse=*/ true))
> return ret;
>
> if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
> diff --git a/mm/shmem.c b/mm/shmem.c
> index e2c76a30802b6..d945de3a7f0e7 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1817,7 +1817,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode,
> vm_flags_t vm_flags = vma ? vma->vm_flags : 0;
> unsigned int global_orders;
>
> - if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags)))
> + if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags, shmem_huge_force)))
> return 0;
>
> global_orders = shmem_huge_global_enabled(inode, index, write_end,
> --
> 2.47.3
Otherwise, LGTM. Reviewed-by: Zi Yan <ziy@...dia.com>
Best Regards,
Yan, Zi
Powered by blists - more mailing lists