lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b8fe659e-8a84-4328-b6d6-6116c616cb3d@redhat.com>
Date: Thu, 12 Jun 2025 15:05:22 +0200
From: David Hildenbrand <david@...hat.com>
To: Baolin Wang <baolin.wang@...ux.alibaba.com>, akpm@...ux-foundation.org,
 hughd@...gle.com
Cc: lorenzo.stoakes@...cle.com, Liam.Howlett@...cle.com, npache@...hat.com,
 ryan.roberts@....com, dev.jain@....com, ziy@...dia.com, linux-mm@...ck.org,
 linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 1/2] mm: huge_memory: disallow hugepages if the
 system-wide THP sysfs settings are disabled

On 12.06.25 14:45, Baolin Wang wrote:
> 
> 
> On 2025/6/12 16:51, David Hildenbrand wrote:
>> On 12.06.25 09:51, Baolin Wang wrote:
>>>
>>>
>>> On 2025/6/11 20:34, David Hildenbrand wrote:
>>>> On 05.06.25 10:00, Baolin Wang wrote:
>>>>> The MADV_COLLAPSE will ignore the system-wide Anon THP sysfs settings,
>>>>> which
>>>>> means that even though we have disabled the Anon THP configuration,
>>>>> MADV_COLLAPSE
>>>>> will still attempt to collapse into a Anon THP. This violates the rule
>>>>> we have
>>>>> agreed upon: never means never.
>>>>>
>>>>> Another rule for madvise, referring to David's suggestion: “allowing
>>>>> for collapsing
>>>>> in a VM without VM_HUGEPAGE in the "madvise" mode would be fine".
>>>>>
>>>>> To address this issue, should check whether the Anon THP configuration
>>>>> is disabled
>>>>> in thp_vma_allowable_orders(), even when the TVA_ENFORCE_SYSFS flag is
>>>>> set.
>>>>>
>>>>> In summary, the current strategy is:
>>>>>
>>>>> 1. If always & orders == 0, and madvise & orders == 0, and
>>>>> hugepage_global_enabled() == false
>>>>> (global THP settings are not enabled), it means mTHP of that orders
>>>>> are prohibited
>>>>> from being used, then madvise_collapse() is forbidden for that orders.
>>>>>
>>>>> 2. If always & orders == 0, and madvise & orders == 0, and
>>>>> hugepage_global_enabled() == true
>>>>> (global THP settings are enabled), and inherit & orders == 0, it means
>>>>> mTHP of that
>>>>> orders are still prohibited from being used, thus madvise_collapse()
>>>>> is not allowed
>>>>> for that orders.
>>>>>
>>>>> Reviewed-by: Zi Yan <ziy@...dia.com>
>>>>> Signed-off-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
>>>>> ---
>>>>>     include/linux/huge_mm.h | 23 +++++++++++++++++++----
>>>>>     1 file changed, 19 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>> index 2f190c90192d..199ddc9f04a1 100644
>>>>> --- a/include/linux/huge_mm.h
>>>>> +++ b/include/linux/huge_mm.h
>>>>> @@ -287,20 +287,35 @@ unsigned long thp_vma_allowable_orders(struct
>>>>> vm_area_struct *vma,
>>>>>                            unsigned long orders)
>>>>>     {
>>>>>         /* Optimization to check if required orders are enabled
>>>>> early. */
>>>>> -    if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) {
>>>>> -        unsigned long mask = READ_ONCE(huge_anon_orders_always);
>>>>> +    if (vma_is_anonymous(vma)) {
>>>>> +        unsigned long always = READ_ONCE(huge_anon_orders_always);
>>>>> +        unsigned long madvise = READ_ONCE(huge_anon_orders_madvise);
>>>>> +        unsigned long inherit = READ_ONCE(huge_anon_orders_inherit);
>>>>> +        unsigned long mask = always | madvise;
>>>>> +
>>>>> +        /*
>>>>> +         * If the system-wide THP/mTHP sysfs settings are disabled,
>>>>> +         * then we should never allow hugepages.
>>>>    > +         */> +        if (!(mask & orders) &&
>>>> !(hugepage_global_enabled() && (inherit & orders)))
>>>>> +            return 0;
>>>>
>>>> I'm still trying to digest that. Isn't there a way for us to work with
>>>> the orders,
>>>> essentially masking off all orders that are forbidden globally. Similar
>>>> to below, if !orders, then return 0?
>>>> /* Orders disabled directly. */
>>>> orders &= ~TODO;
>>>> /* Orders disabled by inheriting from the global toggle. */
>>>> if (!hugepage_global_enabled())
>>>>        orders &= ~READ_ONCE(huge_anon_orders_inherit);
>>>>
>>>> TODO is probably a -1ULL and then clearing always/madvise/inherit. Could
>>>> add a simple helper for that
>>>>
>>>> huge_anon_orders_never
>>>
>>> I followed Lorenzo's suggestion to simplify the logic. Does that look
>>> more readable?
>>>
>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>> index 2f190c90192d..3087ac7631e0 100644
>>> --- a/include/linux/huge_mm.h
>>> +++ b/include/linux/huge_mm.h
>>> @@ -265,6 +265,43 @@ unsigned long __thp_vma_allowable_orders(struct
>>> vm_area_struct *vma,
>>>                                             unsigned long tva_flags,
>>>                                             unsigned long orders);
>>>
>>> +/* Strictly mask requested anonymous orders according to sysfs
>>> settings. */
>>> +static inline unsigned long __thp_mask_anon_orders(unsigned long
>>> vm_flags,
>>> +                               unsigned long tva_flags, unsigned long
>>> orders)
>>> +{
>>> +       unsigned long always = READ_ONCE(huge_anon_orders_always);
>>> +       unsigned long madvise = READ_ONCE(huge_anon_orders_madvise);
>>> +       unsigned long inherit = READ_ONCE(huge_anon_orders_inherit);
>>> +       bool inherit_enabled = hugepage_global_enabled();
>>> +       bool has_madvise =  vm_flags & VM_HUGEPAGE;
>>> +       unsigned long mask = always | madvise;
>>> +
>>> +       mask = always | madvise;
>>> +       if (inherit_enabled)
>>> +               mask |= inherit;
>>> +
>>> +       /* All set to/inherit NEVER - never means never globally,
>>> abort. */
>>> +       if (!(mask & orders))
>>> +               return 0;
>>
>> Still confusing. I am not sure if we would properly catch when someone
>> specifies e.g., 2M and 1M, while we only have 2M disabled.
> 
> IIUC, Yes. In your case, we will only allow order 8 (1M mTHP).
> 
>> I would rewrite the function to only ever substract from "orders".
>>
>> ...
>>
>> /* Disallow orders that are set to NEVER directly ... */
>> order &= (always | madvise | inherit);
>>
>> /* ... or through inheritance. */
>> if (inherit_enabled)
>>       orders &= ~inherit;
> 
> Sorry, I didn't get you here.
> 
> If orders = THP_ORDERS_ALL_ANON, inherit = 0x200 (order 9), always and
> madvise are 0, and inherit_enabled = true. Then orders will be 0 with
> your logic. But we should allow order 9, right?

Yeah, all confusing, because the temporary variables don't help.

if (!inherit_enabled)

or simply

if (!hugepage_global_enabled();)

Let me try again below.

> 
>>
>> /*
>>    * Otherwise, we only enforce sysfs settings if asked. In addition,
>>    * if the user sets a sysfs mode of madvise and if TVA_ENFORCE_SYSFS
>>    * is not set, we don't bother checking whether the VMA has VM_HUGEPAGE
>>    * set.
>>    */
>> if (!orders || !(tva_flags & TVA_ENFORCE_SYSFS))
>>       return orders;
>>
>>> +
>>> +       /*
>>> +        * Otherwise, we only enforce sysfs settings if asked. In
>>> addition,
>>> +        * if the user sets a sysfs mode of madvise and if
>>> TVA_ENFORCE_SYSFS
>>> +        * is not set, we don't bother checking whether the VMA has
>>> VM_HUGEPAGE
>>> +        * set.
>>> +        */
>>> +       if (!(tva_flags & TVA_ENFORCE_SYSFS))
>>> +               return orders;
>>> +
>>> +       mask = always;
>>> +       if (has_madvise)
>>> +               mask |= madvise;
>>> +       if (hugepage_global_always() || (has_madvise && inherit_enabled))
>>> +               mask |= inherit;
>>
>> Similarly, this can maybe become (not 100% sure if I got it right, the
>> condition above is confusing)
> 
> IMO, this is the original logic.

Yeah, and it's absolutely confusing stuff.

Let me try again by only clearing flags. Maybe this would be clearer?
(and correct? still confused why the latter part is so complicated in existing
code)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 8b8f353cc7b81..66fdfe06e4996 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -265,6 +265,42 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma,
                                          unsigned long tva_flags,
                                          unsigned long orders);
  
+/* Strictly mask requested anonymous orders according to sysfs settings. */
+static inline unsigned long __thp_mask_anon_orders(unsigned long vm_flags,
+       unsigned long tva_flags, unsigned long orders)
+{
+       const unsigned long always = READ_ONCE(huge_anon_orders_always);
+       const unsigned long madvise = READ_ONCE(huge_anon_orders_madvise);
+       const unsigned long inherit = READ_ONCE(huge_anon_orders_inherit);
+       const unsigned long never = ~(always | madvise | inherit);
+
+       /* Disallow orders that are set to NEVER directly ... */
+       orders &= ~never;
+
+       /* ... or through inheritance (global == NEVER). */
+       if (!hugepage_global_enabled())
+               orders &= ~inherit;
+
+       /*
+        * Otherwise, we only enforce sysfs settings if asked. In addition,
+        * if the user sets a sysfs mode of madvise and if TVA_ENFORCE_SYSFS
+        * is not set, we don't bother checking whether the VMA has VM_HUGEPAGE
+        * set.
+        */
+       if (!(tva_flags & TVA_ENFORCE_SYSFS))
+               return orders;
+
+       if (!(vm_flags & VM_HUGEPAGE)) {
+               /* Disallow orders that are set to MADVISE directly ... */
+               orders &= ~madvise;
+
+               /* ... or through inheritance (global == MADVISE). */
+               if (!hugepage_global_always())
+                       orders &= ~inherit;
+       }
+       return orders;
+}
+
  /**
   * thp_vma_allowable_orders - determine hugepage orders that are allowed for vma
   * @vma:  the vm area to check
@@ -287,16 +323,8 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
                                        unsigned long orders)
  {
         /* Optimization to check if required orders are enabled early. */
-       if ((tva_flags & TVA_ENFORCE_SYSFS) && vma_is_anonymous(vma)) {
-               unsigned long mask = READ_ONCE(huge_anon_orders_always);
-
-               if (vm_flags & VM_HUGEPAGE)
-                       mask |= READ_ONCE(huge_anon_orders_madvise);
-               if (hugepage_global_always() ||
-                   ((vm_flags & VM_HUGEPAGE) && hugepage_global_enabled()))
-                       mask |= READ_ONCE(huge_anon_orders_inherit);
-
-               orders &= mask;
+       if (vma_is_anonymous(vma)) {
+               orders = __thp_mask_anon_orders(vm_flags, tva_flags, orders);
                 if (!orders)
                         return 0;
         }


-- 
Cheers,

David / dhildenb


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ