[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <db36c444-0327-455b-9502-65d308237018@linux.alibaba.com>
Date: Wed, 15 May 2024 11:09:21 +0800
From: Baolin Wang <baolin.wang@...ux.alibaba.com>
To: Lance Yang <ioworker0@...il.com>
Cc: akpm@...ux-foundation.org, hughd@...gle.com, willy@...radead.org,
david@...hat.com, wangkefeng.wang@...wei.com, ying.huang@...el.com,
21cnbao@...il.com, ryan.roberts@....com, shy828301@...il.com,
ziy@...dia.com, linux-mm@...ck.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 5/7] mm: shmem: add mTHP support for anonymous shmem
On 2024/5/14 21:36, Lance Yang wrote:
> Hi Baolin,
>
> On Mon, May 13, 2024 at 1:08 PM Baolin Wang
> <baolin.wang@...ux.alibaba.com> wrote:
>>
>> Commit 19eaf44954df adds multi-size THP (mTHP) for anonymous pages, that
>> can allow THP to be configured through the sysfs interface located at
>> '/sys/kernel/mm/transparent_hugepage/hugepage-XXkb/enabled'.
>>
>> However, the anonymous share pages will ignore the anonymous mTHP rule
>> configured through the sysfs interface, and can only use the PMD-mapped
>> THP, that is not reasonable. Users expect to apply the mTHP rule for
>> all anonymous pages, including the anonymous share pages, in order to
>> enjoy the benefits of mTHP. For example, lower latency than PMD-mapped THP,
>> smaller memory bloat than PMD-mapped THP, contiguous PTEs on ARM architecture
>> to reduce TLB miss etc.
>>
>> The primary strategy is similar to supporting anonymous mTHP. Introduce
>> a new interface '/mm/transparent_hugepage/hugepage-XXkb/shmem_enabled',
>> which can have all the same values as the top-level
>> '/sys/kernel/mm/transparent_hugepage/shmem_enabled', with adding a new
>> additional "inherit" option. By default all sizes will be set to "never"
>> except PMD size, which is set to "inherit". This ensures backward compatibility
>> with the anonymous shmem enabled of the top level, meanwhile also allows
>> independent control of anonymous shmem enabled for each mTHP.
>>
>> Signed-off-by: Baolin Wang <baolin.wang@...ux.alibaba.com>
>> ---
>> include/linux/huge_mm.h | 10 +++
>> mm/shmem.c | 179 +++++++++++++++++++++++++++++++++-------
>> 2 files changed, 161 insertions(+), 28 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index 1fce6fee7766..b5339210268d 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -583,6 +583,16 @@ static inline bool thp_migration_supported(void)
>> {
>> return false;
>> }
>> +
>> +static inline int highest_order(unsigned long orders)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline int next_order(unsigned long *orders, int prev)
>> +{
>> + return 0;
>> +}
>> #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>>
>> static inline int split_folio_to_list_to_order(struct folio *folio,
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 59cc26d44344..b50ddf013e37 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -1611,6 +1611,106 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
>> return result;
>> }
>>
>> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
>> +static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
>> + struct vm_area_struct *vma, pgoff_t index,
>> + bool global_huge)
>> +{
>> + unsigned long mask = READ_ONCE(huge_anon_shmem_orders_always);
>> + unsigned long within_size_orders = READ_ONCE(huge_anon_shmem_orders_within_size);
>> + unsigned long vm_flags = vma->vm_flags;
>> + /*
>> + * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that
>> + * are enabled for this vma.
>> + */
>> + unsigned long orders = BIT(PMD_ORDER + 1) - 1;
>> + loff_t i_size;
>> + int order;
>> +
>> + if ((vm_flags & VM_NOHUGEPAGE) ||
>> + test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
>> + return 0;
>> +
>> + /* If the hardware/firmware marked hugepage support disabled. */
>> + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
>> + return 0;
>> +
>> + /*
>> + * Following the 'deny' semantics of the top level, force the huge
>> + * option off from all mounts.
>> + */
>> + if (shmem_huge == SHMEM_HUGE_DENY)
>> + return 0;
>> + /*
>> + * Only allow inherit orders if the top-level value is 'force', which
>> + * means non-PMD sized THP can not override 'huge' mount option now.
>> + */
>> + if (shmem_huge == SHMEM_HUGE_FORCE)
>> + return READ_ONCE(huge_anon_shmem_orders_inherit);
>> +
>> + /* Allow mTHP that will be fully within i_size. */
>> + order = highest_order(within_size_orders);
>> + while (within_size_orders) {
>> + index = round_up(index + 1, order);
>> + i_size = round_up(i_size_read(inode), PAGE_SIZE);
>> + if (i_size >> PAGE_SHIFT >= index) {
>> + mask |= within_size_orders;
>> + break;
>> + }
>> +
>> + order = next_order(&within_size_orders, order);
>> + }
>> +
>> + if (vm_flags & VM_HUGEPAGE)
>> + mask |= READ_ONCE(huge_anon_shmem_orders_madvise);
>> +
>> + if (global_huge)
>> + mask |= READ_ONCE(huge_anon_shmem_orders_inherit);
>> +
>> + return orders & mask;
>> +}
>> +
>> +static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
>> + struct address_space *mapping, pgoff_t index,
>> + unsigned long orders)
>> +{
>> + struct vm_area_struct *vma = vmf->vma;
>> + unsigned long pages;
>> + int order;
>> +
>> + orders = thp_vma_suitable_orders(vma, vmf->address, orders);
>> + if (!orders)
>> + return 0;
>> +
>> + /* Find the highest order that can add into the page cache */
>> + order = highest_order(orders);
>> + while (orders) {
>> + pages = 1UL << order;
>> + index = round_down(index, pages);
>> + if (!xa_find(&mapping->i_pages, &index,
>> + index + pages - 1, XA_PRESENT))
>> + break;
>> + order = next_order(&orders, order);
>> + }
>> +
>> + return orders;
>> +}
>> +#else
>> +static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
>> + struct vm_area_struct *vma, pgoff_t index,
>> + bool global_huge)
>> +{
>> + return 0;
>> +}
>> +
>> +static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
>> + struct address_space *mapping, pgoff_t index,
>> + unsigned long orders)
>> +{
>> + return 0;
>> +}
>> +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
>> +
>> static struct folio *shmem_alloc_hugefolio(gfp_t gfp,
>> struct shmem_inode_info *info, pgoff_t index, int order)
>> {
>> @@ -1639,38 +1739,55 @@ static struct folio *shmem_alloc_folio(gfp_t gfp,
>> return (struct folio *)page;
>> }
>>
>> -static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
>> - struct inode *inode, pgoff_t index,
>> - struct mm_struct *fault_mm, bool huge)
>> +static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
>> + gfp_t gfp, struct inode *inode, pgoff_t index,
>> + struct mm_struct *fault_mm, bool huge, unsigned long orders)
>
> IMO, it might be cleaner to drop the huge parameter and just set 'orders' as
> BIT(HPAGE_PMD_ORDER), then we only do the 'orders' check :)
>
> Likely:
>
> if (orders > 0) {
> if (vma && vma_is_anon_shmem(vma)) {
> ...
> } else if (orders & BIT(HPAGE_PMD_ORDER)) {
> ...
> }
> }
Yes, looks better.
>> {
>> struct address_space *mapping = inode->i_mapping;
>> struct shmem_inode_info *info = SHMEM_I(inode);
>> - struct folio *folio;
>> + struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
>> + unsigned long suitable_orders;
>> + struct folio *folio = NULL;
>> long pages;
>> - int error;
>> + int error, order;
>>
>> if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
>> huge = false;
>
> Currently, if THP is disabled, 'huge' will fall back to order-0, but 'orders'
> does not, IIUC. How about we make both consistent if THP is disabled?
>
> if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
> huge = false;
> orders = 0;
> }
If THP is disabled, the 'orders' must be 0, so no need to reset it.
Powered by blists - more mailing lists