lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <2f1cdad3-6ac4-4502-a599-5bef9dbe0847@lucifer.local>
Date: Wed, 19 Nov 2025 12:13:00 +0000
From: Lorenzo Stoakes <lorenzo.stoakes@...cle.com>
To: Nico Pache <npache@...hat.com>
Cc: linux-kernel@...r.kernel.org, linux-trace-kernel@...r.kernel.org,
        linux-mm@...ck.org, linux-doc@...r.kernel.org, david@...hat.com,
        ziy@...dia.com, baolin.wang@...ux.alibaba.com, Liam.Howlett@...cle.com,
        ryan.roberts@....com, dev.jain@....com, corbet@....net,
        rostedt@...dmis.org, mhiramat@...nel.org,
        mathieu.desnoyers@...icios.com, akpm@...ux-foundation.org,
        baohua@...nel.org, willy@...radead.org, peterx@...hat.com,
        wangkefeng.wang@...wei.com, usamaarif642@...il.com,
        sunnanyong@...wei.com, vishal.moola@...il.com,
        thomas.hellstrom@...ux.intel.com, yang@...amperecomputing.com,
        kas@...nel.org, aarcange@...hat.com, raquini@...hat.com,
        anshuman.khandual@....com, catalin.marinas@....com, tiwai@...e.de,
        will@...nel.org, dave.hansen@...ux.intel.com, jack@...e.cz,
        cl@...two.org, jglisse@...gle.com, surenb@...gle.com,
        zokeefe@...gle.com, hannes@...xchg.org, rientjes@...gle.com,
        mhocko@...e.com, rdunlap@...radead.org, hughd@...gle.com,
        richard.weiyang@...il.com, lance.yang@...ux.dev, vbabka@...e.cz,
        rppt@...nel.org, jannh@...gle.com, pfalcato@...e.de
Subject: Re: [PATCH v12 mm-new 14/15] khugepaged: run khugepaged for all
 orders

On Wed, Oct 22, 2025 at 12:37:16PM -0600, Nico Pache wrote:
> From: Baolin Wang <baolin.wang@...ux.alibaba.com>
>
> If any order (m)THP is enabled we should allow running khugepaged to
> attempt scanning and collapsing mTHPs. In order for khugepaged to operate
> when only mTHP sizes are specified in sysfs, we must modify the predicate
> function that determines whether it ought to run to do so.
>
> This function is currently called hugepage_pmd_enabled(), this patch
> renames it to hugepage_enabled() and updates the logic to check to
> determine whether any valid orders may exist which would justify
> khugepaged running.
>
> We must also update collapse_allowable_orders() to check all orders if
> the vma is anonymous and the collapse is khugepaged.
>
> After this patch khugepaged mTHP collapse is fully enabled.
>
> Signed-off-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
> Signed-off-by: Nico Pache <npache@...hat.com>
> ---
>  mm/khugepaged.c | 25 +++++++++++++------------
>  1 file changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 54f5c7888e46..8ed9f8e2d376 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -418,23 +418,23 @@ static inline int collapse_test_exit_or_disable(struct mm_struct *mm)
>  		mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm);
>  }
>
> -static bool hugepage_pmd_enabled(void)
> +static bool hugepage_enabled(void)
>  {
>  	/*
>  	 * We cover the anon, shmem and the file-backed case here; file-backed
>  	 * hugepages, when configured in, are determined by the global control.
> -	 * Anon pmd-sized hugepages are determined by the pmd-size control.
> +	 * Anon hugepages are determined by its per-size mTHP control.
>  	 * Shmem pmd-sized hugepages are also determined by its pmd-size control,
>  	 * except when the global shmem_huge is set to SHMEM_HUGE_DENY.
>  	 */
>  	if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
>  	    hugepage_global_enabled())
>  		return true;
> -	if (test_bit(PMD_ORDER, &huge_anon_orders_always))
> +	if (READ_ONCE(huge_anon_orders_always))
>  		return true;
> -	if (test_bit(PMD_ORDER, &huge_anon_orders_madvise))
> +	if (READ_ONCE(huge_anon_orders_madvise))
>  		return true;
> -	if (test_bit(PMD_ORDER, &huge_anon_orders_inherit) &&
> +	if (READ_ONCE(huge_anon_orders_inherit) &&
>  	    hugepage_global_enabled())
>  		return true;
>  	if (IS_ENABLED(CONFIG_SHMEM) && shmem_hpage_pmd_enabled())
> @@ -508,7 +508,8 @@ static unsigned long collapse_allowable_orders(struct vm_area_struct *vma,
>  			vm_flags_t vm_flags, bool is_khugepaged)
>  {
>  	enum tva_type tva_flags = is_khugepaged ? TVA_KHUGEPAGED : TVA_FORCED_COLLAPSE;
> -	unsigned long orders = BIT(HPAGE_PMD_ORDER);
> +	unsigned long orders = is_khugepaged && vma_is_anonymous(vma) ?
> +				THP_ORDERS_ALL_ANON : BIT(HPAGE_PMD_ORDER);

Why are we doing this? If this is explicitly enabling mTHP for anon, which it
seems to be, can we please make this a little more explicit :)

I'd prefer this not to be a horribly squashed ternary, rather:

	unsigned long orders;

	/* We explicitly allow mTHP collapse for anonymous khugepaged ONLY. */
	if (is_khugepaged && vma_is_anonymous(vma))
		orders = THP_ORDERS_ALL_ANON;
	else
		orders = BIT(HPAGE_PMD_ORDER);


Also, does THP_ORDERS_ALL_ANON account for KHUGEPAGED_MIN_MTHP_ORDER? It's weird
to say that an order is allowed that isn't permitted by mTHP (e.g. order-0).

>
>  	return thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
>  }
> @@ -517,7 +518,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
>  			  vm_flags_t vm_flags)
>  {
>  	if (!mm_flags_test(MMF_VM_HUGEPAGE, vma->vm_mm) &&
> -	    hugepage_pmd_enabled()) {
> +	    hugepage_enabled()) {
>  		if (collapse_allowable_orders(vma, vm_flags, true))
>  			__khugepaged_enter(vma->vm_mm);
>  	}
> @@ -2791,7 +2792,7 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, int *result,
>
>  static int khugepaged_has_work(void)
>  {
> -	return !list_empty(&khugepaged_scan.mm_head) && hugepage_pmd_enabled();
> +	return !list_empty(&khugepaged_scan.mm_head) && hugepage_enabled();
>  }
>
>  static int khugepaged_wait_event(void)
> @@ -2864,7 +2865,7 @@ static void khugepaged_wait_work(void)
>  		return;
>  	}
>
> -	if (hugepage_pmd_enabled())
> +	if (hugepage_enabled())
>  		wait_event_freezable(khugepaged_wait, khugepaged_wait_event());
>  }
>
> @@ -2895,7 +2896,7 @@ static void set_recommended_min_free_kbytes(void)
>  	int nr_zones = 0;
>  	unsigned long recommended_min;
>
> -	if (!hugepage_pmd_enabled()) {
> +	if (!hugepage_enabled()) {
>  		calculate_min_free_kbytes();
>  		goto update_wmarks;
>  	}
> @@ -2945,7 +2946,7 @@ int start_stop_khugepaged(void)
>  	int err = 0;
>
>  	mutex_lock(&khugepaged_mutex);
> -	if (hugepage_pmd_enabled()) {
> +	if (hugepage_enabled()) {
>  		if (!khugepaged_thread)
>  			khugepaged_thread = kthread_run(khugepaged, NULL,
>  							"khugepaged");
> @@ -2971,7 +2972,7 @@ int start_stop_khugepaged(void)
>  void khugepaged_min_free_kbytes_update(void)
>  {
>  	mutex_lock(&khugepaged_mutex);
> -	if (hugepage_pmd_enabled() && khugepaged_thread)
> +	if (hugepage_enabled() && khugepaged_thread)
>  		set_recommended_min_free_kbytes();
>  	mutex_unlock(&khugepaged_mutex);
>  }
> --
> 2.51.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ