linux-kernel - Re: [PATCH v2 5/7] mm: shmem: add mTHP support for anonymous shmem

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db36c444-0327-455b-9502-65d308237018@linux.alibaba.com>
Date: Wed, 15 May 2024 11:09:21 +0800
From: Baolin Wang <baolin.wang@...ux.alibaba.com>
To: Lance Yang <ioworker0@...il.com>
Cc: akpm@...ux-foundation.org, hughd@...gle.com, willy@...radead.org,
 david@...hat.com, wangkefeng.wang@...wei.com, ying.huang@...el.com,
 21cnbao@...il.com, ryan.roberts@....com, shy828301@...il.com,
 ziy@...dia.com, linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 5/7] mm: shmem: add mTHP support for anonymous shmem



On 2024/5/14 21:36, Lance Yang wrote:
> Hi Baolin,
> 
> On Mon, May 13, 2024 at 1:08 PM Baolin Wang
> <baolin.wang@...ux.alibaba.com> wrote:
>>
>> Commit 19eaf44954df adds multi-size THP (mTHP) for anonymous pages, that
>> can allow THP to be configured through the sysfs interface located at
>> '/sys/kernel/mm/transparent_hugepage/hugepage-XXkb/enabled'.
>>
>> However, the anonymous share pages will ignore the anonymous mTHP rule
>> configured through the sysfs interface, and can only use the PMD-mapped
>> THP, that is not reasonable. Users expect to apply the mTHP rule for
>> all anonymous pages, including the anonymous share pages, in order to
>> enjoy the benefits of mTHP. For example, lower latency than PMD-mapped THP,
>> smaller memory bloat than PMD-mapped THP, contiguous PTEs on ARM architecture
>> to reduce TLB miss etc.
>>
>> The primary strategy is similar to supporting anonymous mTHP. Introduce
>> a new interface '/mm/transparent_hugepage/hugepage-XXkb/shmem_enabled',
>> which can have all the same values as the top-level
>> '/sys/kernel/mm/transparent_hugepage/shmem_enabled', with adding a new
>> additional "inherit" option. By default all sizes will be set to "never"
>> except PMD size, which is set to "inherit". This ensures backward compatibility
>> with the anonymous shmem enabled of the top level, meanwhile also allows
>> independent control of anonymous shmem enabled for each mTHP.
>>
>> Signed-off-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
>> ---
>>   include/linux/huge_mm.h |  10 +++
>>   mm/shmem.c              | 179 +++++++++++++++++++++++++++++++++-------
>>   2 files changed, 161 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 1fce6fee7766..b5339210268d 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -583,6 +583,16 @@ static inline bool thp_migration_supported(void)
>>   {
>>          return false;
>>   }
>> +
>> +static inline int highest_order(unsigned long orders)
>> +{
>> +       return 0;
>> +}
>> +
>> +static inline int next_order(unsigned long *orders, int prev)
>> +{
>> +       return 0;
>> +}
>>   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>>
>>   static inline int split_folio_to_list_to_order(struct folio *folio,
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 59cc26d44344..b50ddf013e37 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1611,6 +1611,106 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>>          return result;
>>   }
>>
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
>> +                               struct vm_area_struct *vma, pgoff_t index,
>> +                               bool global_huge)
>> +{
>> +       unsigned long mask = READ_ONCE(huge_anon_shmem_orders_always);
>> +       unsigned long within_size_orders = READ_ONCE(huge_anon_shmem_orders_within_size);
>> +       unsigned long vm_flags = vma->vm_flags;
>> +       /*
>> +        * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that
>> +        * are enabled for this vma.
>> +        */
>> +       unsigned long orders = BIT(PMD_ORDER + 1) - 1;
>> +       loff_t i_size;
>> +       int order;
>> +
>> +       if ((vm_flags & VM_NOHUGEPAGE) ||
>> +           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>> +               return 0;
>> +
>> +       /* If the hardware/firmware marked hugepage support disabled. */
>> +       if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
>> +               return 0;
>> +
>> +       /*
>> +        * Following the 'deny' semantics of the top level, force the huge
>> +        * option off from all mounts.
>> +        */
>> +       if (shmem_huge == SHMEM_HUGE_DENY)
>> +               return 0;
>> +       /*
>> +        * Only allow inherit orders if the top-level value is 'force', which
>> +        * means non-PMD sized THP can not override 'huge' mount option now.
>> +        */
>> +       if (shmem_huge == SHMEM_HUGE_FORCE)
>> +               return READ_ONCE(huge_anon_shmem_orders_inherit);
>> +
>> +       /* Allow mTHP that will be fully within i_size. */
>> +       order = highest_order(within_size_orders);
>> +       while (within_size_orders) {
>> +               index = round_up(index + 1, order);
>> +               i_size = round_up(i_size_read(inode), PAGE_SIZE);
>> +               if (i_size >> PAGE_SHIFT >= index) {
>> +                       mask |= within_size_orders;
>> +                       break;
>> +               }
>> +
>> +               order = next_order(&within_size_orders, order);
>> +       }
>> +
>> +       if (vm_flags & VM_HUGEPAGE)
>> +               mask |= READ_ONCE(huge_anon_shmem_orders_madvise);
>> +
>> +       if (global_huge)
>> +               mask |= READ_ONCE(huge_anon_shmem_orders_inherit);
>> +
>> +       return orders & mask;
>> +}
>> +
>> +static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
>> +                                       struct address_space *mapping, pgoff_t index,
>> +                                       unsigned long orders)
>> +{
>> +       struct vm_area_struct *vma = vmf->vma;
>> +       unsigned long pages;
>> +       int order;
>> +
>> +       orders = thp_vma_suitable_orders(vma, vmf->address, orders);
>> +       if (!orders)
>> +               return 0;
>> +
>> +       /* Find the highest order that can add into the page cache */
>> +       order = highest_order(orders);
>> +       while (orders) {
>> +               pages = 1UL << order;
>> +               index = round_down(index, pages);
>> +               if (!xa_find(&mapping->i_pages, &index,
>> +                            index + pages - 1, XA_PRESENT))
>> +                       break;
>> +               order = next_order(&orders, order);
>> +       }
>> +
>> +       return orders;
>> +}
>> +#else
>> +static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
>> +                               struct vm_area_struct *vma, pgoff_t index,
>> +                               bool global_huge)
>> +{
>> +       return 0;
>> +}
>> +
>> +static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
>> +                                       struct address_space *mapping, pgoff_t index,
>> +                                       unsigned long orders)
>> +{
>> +       return 0;
>> +}
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +
>>   static struct folio *shmem_alloc_hugefolio(gfp_t gfp,
>>                  struct shmem_inode_info *info, pgoff_t index, int order)
>>   {
>> @@ -1639,38 +1739,55 @@ static struct folio *shmem_alloc_folio(gfp_t gfp,
>>          return (struct folio *)page;
>>   }
>>
>> -static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
>> -               struct inode *inode, pgoff_t index,
>> -               struct mm_struct *fault_mm, bool huge)
>> +static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>> +               gfp_t gfp, struct inode *inode, pgoff_t index,
>> +               struct mm_struct *fault_mm, bool huge, unsigned long orders)
> 
> IMO, it might be cleaner to drop the huge parameter and just set 'orders' as
> BIT(HPAGE_PMD_ORDER), then we only do the 'orders' check :)
> 
> Likely:
> 
> if (orders > 0) {
>         if (vma && vma_is_anon_shmem(vma)) {
>                ...
>         } else if (orders & BIT(HPAGE_PMD_ORDER)) {
>                ...
>         }
> }

Yes, looks better.

>>   {
>>          struct address_space *mapping = inode->i_mapping;
>>          struct shmem_inode_info *info = SHMEM_I(inode);
>> -       struct folio *folio;
>> +       struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
>> +       unsigned long suitable_orders;
>> +       struct folio *folio = NULL;
>>          long pages;
>> -       int error;
>> +       int error, order;
>>
>>          if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
>>                  huge = false;
> 
> Currently, if THP is disabled, 'huge' will fall back to order-0, but 'orders'
> does not, IIUC. How about we make both consistent if THP is disabled?
> 
> if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
>          huge = false;
>          orders = 0;
> }

If THP is disabled, the 'orders' must be 0, so no need to reset it.