lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <34a9440f-b0c4-4f76-a2ac-f88b54c2242e@gmail.com>
Date: Fri, 26 Sep 2025 15:11:42 +0100
From: Usama Arif <usamaarif642@...il.com>
To: Yafang Shao <laoar.shao@...il.com>, akpm@...ux-foundation.org,
 david@...hat.com, ziy@...dia.com, baolin.wang@...ux.alibaba.com,
 lorenzo.stoakes@...cle.com, Liam.Howlett@...cle.com, npache@...hat.com,
 ryan.roberts@....com, dev.jain@....com, hannes@...xchg.org,
 gutierrez.asier@...wei-partners.com, willy@...radead.org, ast@...nel.org,
 daniel@...earbox.net, andrii@...nel.org, ameryhung@...il.com,
 rientjes@...gle.com, corbet@....net, 21cnbao@...il.com,
 shakeel.butt@...ux.dev, tj@...nel.org, lance.yang@...ux.dev
Cc: bpf@...r.kernel.org, linux-mm@...ck.org, linux-doc@...r.kernel.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 mm-new 01/12] mm: thp: remove disabled task from
 khugepaged_mm_slot



On 26/09/2025 10:33, Yafang Shao wrote:
> Since a task with MMF_DISABLE_THP_COMPLETELY cannot use THP, remove it from
> the khugepaged_mm_slot to stop khugepaged from processing it.
> 
> After this change, the following semantic relationship always holds:
> 
>   MMF_VM_HUGEPAGE is set     == task is in khugepaged mm_slot
>   MMF_VM_HUGEPAGE is not set == task is not in khugepaged mm_slot
> 
> Signed-off-by: Yafang Shao <laoar.shao@...il.com>
> Acked-by: Lance Yang <lance.yang@...ux.dev>
> ---
>  include/linux/khugepaged.h |  4 ++++
>  kernel/sys.c               |  7 ++++--
>  mm/khugepaged.c            | 49 ++++++++++++++++++++------------------
>  3 files changed, 35 insertions(+), 25 deletions(-)
> 


Hi Yafang,

Thanks for the patch! Sorry wasnt able to review the previous revisions.

I think it would be good to separate this patch out of the series?
It would make the review of this series shorter and this patch can be merged independently.

In the commit message, we also need to write explicitly that when prctl
PR_SET_THP_DISABLE is cleared, the mm is added back for khugepaged to consider.

Could you also mention in the commit message why the BUG was turned into WARN?

Thanks!

> diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
> index eb1946a70cff..f14680cd9854 100644
> --- a/include/linux/khugepaged.h
> +++ b/include/linux/khugepaged.h
> @@ -15,6 +15,7 @@ extern void __khugepaged_enter(struct mm_struct *mm);
>  extern void __khugepaged_exit(struct mm_struct *mm);
>  extern void khugepaged_enter_vma(struct vm_area_struct *vma,
>  				 vm_flags_t vm_flags);
> +extern void khugepaged_enter_mm(struct mm_struct *mm);
>  extern void khugepaged_min_free_kbytes_update(void);
>  extern bool current_is_khugepaged(void);
>  extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
> @@ -42,6 +43,9 @@ static inline void khugepaged_enter_vma(struct vm_area_struct *vma,
>  					vm_flags_t vm_flags)
>  {
>  }
> +static inline void khugepaged_enter_mm(struct mm_struct *mm)
> +{
> +}
>  static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
>  					  unsigned long addr, bool install_pmd)
>  {
> diff --git a/kernel/sys.c b/kernel/sys.c
> index a46d9b75880b..2c445bf44ce3 100644
> --- a/kernel/sys.c
> +++ b/kernel/sys.c
> @@ -8,6 +8,7 @@
>  #include <linux/export.h>
>  #include <linux/mm.h>
>  #include <linux/mm_inline.h>
> +#include <linux/khugepaged.h>
>  #include <linux/utsname.h>
>  #include <linux/mman.h>
>  #include <linux/reboot.h>
> @@ -2479,7 +2480,7 @@ static int prctl_set_thp_disable(bool thp_disable, unsigned long flags,
>  	/* Flags are only allowed when disabling. */
>  	if ((!thp_disable && flags) || (flags & ~PR_THP_DISABLE_EXCEPT_ADVISED))
>  		return -EINVAL;
> -	if (mmap_write_lock_killable(current->mm))
> +	if (mmap_write_lock_killable(mm))
>  		return -EINTR;
>  	if (thp_disable) {
>  		if (flags & PR_THP_DISABLE_EXCEPT_ADVISED) {
> @@ -2493,7 +2494,9 @@ static int prctl_set_thp_disable(bool thp_disable, unsigned long flags,
>  		mm_flags_clear(MMF_DISABLE_THP_COMPLETELY, mm);
>  		mm_flags_clear(MMF_DISABLE_THP_EXCEPT_ADVISED, mm);
>  	}
> -	mmap_write_unlock(current->mm);
> +
> +	khugepaged_enter_mm(mm);
> +	mmap_write_unlock(mm);
>  	return 0;
>  }
>  
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 7ab2d1a42df3..f47ac8c19447 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -396,15 +396,10 @@ void __init khugepaged_destroy(void)
>  	kmem_cache_destroy(mm_slot_cache);
>  }
>  
> -static inline int hpage_collapse_test_exit(struct mm_struct *mm)
> -{
> -	return atomic_read(&mm->mm_users) == 0;
> -}
> -
>  static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
>  {
> -	return hpage_collapse_test_exit(mm) ||
> -		mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm);
> +	return !atomic_read(&mm->mm_users) ||			/* exit */
> +		mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm);  /* disable */
>  }
>  
>  static bool hugepage_pmd_enabled(void)
> @@ -437,7 +432,7 @@ void __khugepaged_enter(struct mm_struct *mm)
>  	int wakeup;
>  
>  	/* __khugepaged_exit() must not run from under us */
> -	VM_BUG_ON_MM(hpage_collapse_test_exit(mm), mm);
> +	VM_WARN_ON_ONCE(hpage_collapse_test_exit_or_disable(mm));
>  	if (unlikely(mm_flags_test_and_set(MMF_VM_HUGEPAGE, mm)))
>  		return;
>  
> @@ -460,14 +455,25 @@ void __khugepaged_enter(struct mm_struct *mm)
>  		wake_up_interruptible(&khugepaged_wait);
>  }
>  
> +void khugepaged_enter_mm(struct mm_struct *mm)
> +{
> +	if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm))
> +		return;
> +	if (mm_flags_test(MMF_VM_HUGEPAGE, mm))
> +		return;
> +	if (!hugepage_pmd_enabled())
> +		return;
> +
> +	__khugepaged_enter(mm);
> +}
> +
>  void khugepaged_enter_vma(struct vm_area_struct *vma,
>  			  vm_flags_t vm_flags)
>  {
> -	if (!mm_flags_test(MMF_VM_HUGEPAGE, vma->vm_mm) &&
> -	    hugepage_pmd_enabled()) {
> -		if (thp_vma_allowable_order(vma, vm_flags, TVA_KHUGEPAGED, PMD_ORDER))
> -			__khugepaged_enter(vma->vm_mm);
> -	}
> +	if (!thp_vma_allowable_order(vma, vm_flags, TVA_KHUGEPAGED, PMD_ORDER))
> +		return;
> +
> +	khugepaged_enter_mm(vma->vm_mm);
>  }
>  
>  void __khugepaged_exit(struct mm_struct *mm)
> @@ -491,7 +497,7 @@ void __khugepaged_exit(struct mm_struct *mm)
>  	} else if (slot) {
>  		/*
>  		 * This is required to serialize against
> -		 * hpage_collapse_test_exit() (which is guaranteed to run
> +		 * hpage_collapse_test_exit_or_disable() (which is guaranteed to run
>  		 * under mmap sem read mode). Stop here (after we return all
>  		 * pagetables will be destroyed) until khugepaged has finished
>  		 * working on the pagetables under the mmap_lock.
> @@ -1429,16 +1435,13 @@ static void collect_mm_slot(struct mm_slot *slot)
>  
>  	lockdep_assert_held(&khugepaged_mm_lock);
>  
> -	if (hpage_collapse_test_exit(mm)) {
> +	if (hpage_collapse_test_exit_or_disable(mm)) {
>  		/* free mm_slot */
>  		hash_del(&slot->hash);
>  		list_del(&slot->mm_node);
>  
> -		/*
> -		 * Not strictly needed because the mm exited already.
> -		 *
> -		 * mm_flags_clear(MMF_VM_HUGEPAGE, mm);
> -		 */
> +		/* If the mm is disabled, this flag must be cleared. */
> +		mm_flags_clear(MMF_VM_HUGEPAGE, mm);
>  
>  		/* khugepaged_mm_lock actually not necessary for the below */
>  		mm_slot_free(mm_slot_cache, slot);
> @@ -1749,7 +1752,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
>  		if (find_pmd_or_thp_or_none(mm, addr, &pmd) != SCAN_SUCCEED)
>  			continue;
>  
> -		if (hpage_collapse_test_exit(mm))
> +		if (hpage_collapse_test_exit_or_disable(mm))
>  			continue;
>  		/*
>  		 * When a vma is registered with uffd-wp, we cannot recycle
> @@ -2500,9 +2503,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int *result,
>  	VM_BUG_ON(khugepaged_scan.mm_slot != slot);
>  	/*
>  	 * Release the current mm_slot if this mm is about to die, or
> -	 * if we scanned all vmas of this mm.
> +	 * if we scanned all vmas of this mm, or if this mm is disabled.
>  	 */
> -	if (hpage_collapse_test_exit(mm) || !vma) {
> +	if (hpage_collapse_test_exit_or_disable(mm) || !vma) {
>  		/*
>  		 * Make sure that if mm_users is reaching zero while
>  		 * khugepaged runs here, khugepaged_exit will find


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ