linux-kernel - Re: [v3 PATCH 6/7] mm: thp: kill __transhuge_page

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAa6QmQ4DaFo9RFXRBZnvq=JbQcVOW9P2njCNXU_pjTxcg=3Sg@mail.gmail.com>
Date:   Thu, 9 Jun 2022 19:22:16 -0700
From:   "Zach O'Keefe" <zokeefe@...gle.com>
To:     Yang Shi <shy828301@...il.com>
Cc:     vbabka@...e.cz, kirill.shutemov@...ux.intel.com,
        willy@...radead.org, akpm@...ux-foundation.org, linux-mm@...ck.org,
        linux-kernel@...r.kernel.org
Subject: Re: [v3 PATCH 6/7] mm: thp: kill __transhuge_page_enabled()

On Mon, Jun 6, 2022 at 2:44 PM Yang Shi <shy828301@...il.com> wrote:
>
> The page fault path checks THP eligibility with
> __transhuge_page_enabled() which does the similar thing as
> hugepage_vma_check(), so use hugepage_vma_check() instead.
>
> However page fault allows DAX and !anon_vma cases, so added a new flag,
> in_pf, to hugepage_vma_check() to make page fault work correctly.
>
> The in_pf flag is also used to skip shmem and file THP for page fault
> since shmem handles THP in its own shmem_fault() and file THP allocation
> on fault is not supported yet.
>
> Also remove hugepage_vma_enabled() since hugepage_vma_check() is the
> only caller now, it is not necessary to have a helper function.
>
> Signed-off-by: Yang Shi <shy828301@...il.com>
> ---
>  fs/proc/task_mmu.c         |  2 +-
>  include/linux/huge_mm.h    | 57 ++------------------------------------
>  include/linux/khugepaged.h |  2 +-
>  mm/huge_memory.c           | 25 ++++++++++++-----
>  mm/khugepaged.c            |  8 +++---
>  mm/memory.c                |  7 +++--
>  6 files changed, 31 insertions(+), 70 deletions(-)
>
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index fd79566e204c..a0850303baec 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -860,7 +860,7 @@ static int show_smap(struct seq_file *m, void *v)
>         __show_smap(m, &mss, false);
>
>         seq_printf(m, "THPeligible:    %d\n",
> -                  hugepage_vma_check(vma, vma->vm_flags, true));
> +                  hugepage_vma_check(vma, vma->vm_flags, true, false));
>
>         if (arch_pkeys_enabled())
>                 seq_printf(m, "ProtectionKey:  %8u\n", vma_pkey(vma));
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index f561c3e16def..d478e8875023 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -153,48 +153,6 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
>         return true;
>  }
>
> -static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
> -                                         unsigned long vm_flags)
> -{
> -       /* Explicitly disabled through madvise. */
> -       if ((vm_flags & VM_NOHUGEPAGE) ||
> -           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
> -               return false;
> -       return true;
> -}
> -
> -/*
> - * to be used on vmas which are known to support THP.
> - * Use transparent_hugepage_active otherwise
> - */
> -static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
> -{
> -
> -       /*
> -        * If the hardware/firmware marked hugepage support disabled.
> -        */
> -       if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
> -               return false;
> -
> -       if (!transhuge_vma_enabled(vma, vma->vm_flags))
> -               return false;
> -
> -       if (vma_is_temporary_stack(vma))
> -               return false;
> -
> -       if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
> -               return true;
> -
> -       if (vma_is_dax(vma))
> -               return true;
> -
> -       if (transparent_hugepage_flags &
> -                               (1 << TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG))
> -               return !!(vma->vm_flags & VM_HUGEPAGE);
> -
> -       return false;
> -}
> -
>  static inline bool file_thp_enabled(struct vm_area_struct *vma)
>  {
>         struct inode *inode;
> @@ -211,7 +169,7 @@ static inline bool file_thp_enabled(struct vm_area_struct *vma)
>
>  bool hugepage_vma_check(struct vm_area_struct *vma,
>                         unsigned long vm_flags,
> -                       bool smaps);
> +                       bool smaps, bool in_pf);
>
>  #define transparent_hugepage_use_zero_page()                           \
>         (transparent_hugepage_flags &                                   \
> @@ -355,11 +313,6 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
>         return false;
>  }
>
> -static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
> -{
> -       return false;
> -}
> -
>  static inline bool transhuge_vma_size_ok(struct vm_area_struct *vma)
>  {
>         return false;
> @@ -371,15 +324,9 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
>         return false;
>  }
>
> -static inline bool transhuge_vma_enabled(struct vm_area_struct *vma,
> -                                         unsigned long vm_flags)
> -{
> -       return false;
> -}
> -
>  static inline bool hugepage_vma_check(struct vm_area_struct *vma,
>                                        unsigned long vm_flags,
> -                                      bool smaps)
> +                                      bool smaps, bool in_pf)
>  {
>         return false;
>  }
> diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
> index 8a6452e089ca..e047be601268 100644
> --- a/include/linux/khugepaged.h
> +++ b/include/linux/khugepaged.h
> @@ -55,7 +55,7 @@ static inline void khugepaged_enter(struct vm_area_struct *vma,
>  {
>         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
>             khugepaged_enabled()) {
> -               if (hugepage_vma_check(vma, vm_flags, false))
> +               if (hugepage_vma_check(vma, vm_flags, false, false))
>                         __khugepaged_enter(vma->vm_mm);
>         }
>  }
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index bc8370856e85..b95786ada466 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -71,17 +71,25 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL;
>
>  bool hugepage_vma_check(struct vm_area_struct *vma,
>                         unsigned long vm_flags,
> -                       bool smaps)
> +                       bool smaps, bool in_pf)
>  {
> -       if (!transhuge_vma_enabled(vma, vm_flags))
> +       /* Explicitly disabled through madvise or prctl. */

Or s390 kvm (not that this has to be exhaustively maintained).

> +       if ((vm_flags & VM_NOHUGEPAGE) ||
> +           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
> +               return false;
> +       /*
> +        * If the hardware/firmware marked hugepage support disabled.
> +        */
> +       if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
>                 return false;

This introduces an extra check for khugepaged path. I don't know
enough about TRANSPARENT_HUGEPAGE_NEVER_DAX, but I assume this is ok?
What would have happened previously if khugepaged tried to collapse
this memory?

> +       /* Special VMA and hugetlb VMA */
>         if (vm_flags & VM_NO_KHUGEPAGED)
>                 return false;

This adds an extra check along the fault path. Is it also safe to add?

> -       /* Don't run khugepaged against DAX vma */
> +       /* khugepaged doesn't collapse DAX vma, but page fault is fine. */
>         if (vma_is_dax(vma))
> -               return false;
> +               return in_pf;

I assume vma_is_temporary_stack() and vma_is_dax() is mutually exclusive.

>         if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) -
>                                 vma->vm_pgoff, HPAGE_PMD_NR))
> @@ -91,7 +99,7 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
>                 return false;
>
>         /* Enabled via shmem mount options or sysfs settings. */
> -       if (shmem_file(vma->vm_file))
> +       if (!in_pf && shmem_file(vma->vm_file))
>                 return shmem_huge_enabled(vma);

Will shmem_file() ever be true in the fault path? Or is this just an
optimization?

>         if (!khugepaged_enabled())
> @@ -102,7 +110,7 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
>                 return false;
>
>         /* Only regular file is valid */
> -       if (file_thp_enabled(vma))
> +       if (!in_pf && file_thp_enabled(vma))
>                 return true;

Likewise for file_thp_enabled()

>         if (!vma_is_anonymous(vma))
> @@ -114,9 +122,12 @@ bool hugepage_vma_check(struct vm_area_struct *vma,
>         /*
>          * THPeligible bit of smaps should show 1 for proper VMAs even
>          * though anon_vma is not initialized yet.
> +        *
> +        * Allow page fault since anon_vma may be not initialized until
> +        * the first page fault.
>          */
>         if (!vma->anon_vma)
> -               return smaps;
> +               return (smaps || in_pf);
>
>         return true;
>  }
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index aa0769e3b0d9..ab6183c5489f 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -473,7 +473,7 @@ void khugepaged_enter_vma(struct vm_area_struct *vma,
>  {
>         if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags) &&
>             khugepaged_enabled()) {
> -               if (hugepage_vma_check(vma, vm_flags, false))
> +               if (hugepage_vma_check(vma, vm_flags, false, false))
>                         __khugepaged_enter(vma->vm_mm);
>         }
>  }
> @@ -918,7 +918,7 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
>
>         if (!transhuge_vma_suitable(vma, address))
>                 return SCAN_ADDRESS_RANGE;
> -       if (!hugepage_vma_check(vma, vma->vm_flags, false))
> +       if (!hugepage_vma_check(vma, vma->vm_flags, false, false))
>                 return SCAN_VMA_CHECK;
>         return 0;
>  }
> @@ -1399,7 +1399,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
>          * the valid THP. Add extra VM_HUGEPAGE so hugepage_vma_check()
>          * will not fail the vma for missing VM_HUGEPAGE
>          */
> -       if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE, false))
> +       if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE, false, false))
>                 return;
>
>         /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */
> @@ -2089,7 +2089,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>                         progress++;
>                         break;
>                 }
> -               if (!hugepage_vma_check(vma, vma->vm_flags, false)) {
> +               if (!hugepage_vma_check(vma, vma->vm_flags, false, false)) {
>  skip:
>                         progress++;
>                         continue;
> diff --git a/mm/memory.c b/mm/memory.c
> index bc5d40eec5d5..673f7561a30a 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4962,6 +4962,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
>                 .gfp_mask = __get_fault_gfp_mask(vma),
>         };
>         struct mm_struct *mm = vma->vm_mm;
> +       unsigned long vm_flags = vma->vm_flags;
>         pgd_t *pgd;
>         p4d_t *p4d;
>         vm_fault_t ret;
> @@ -4975,7 +4976,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
>         if (!vmf.pud)
>                 return VM_FAULT_OOM;
>  retry_pud:
> -       if (pud_none(*vmf.pud) && __transparent_hugepage_enabled(vma)) {
> +       if (pud_none(*vmf.pud) &&
> +           hugepage_vma_check(vma, vm_flags, false, true)) {
>                 ret = create_huge_pud(&vmf);
>                 if (!(ret & VM_FAULT_FALLBACK))
>                         return ret;
> @@ -5008,7 +5010,8 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
>         if (pud_trans_unstable(vmf.pud))
>                 goto retry_pud;
>
> -       if (pmd_none(*vmf.pmd) && __transparent_hugepage_enabled(vma)) {
> +       if (pmd_none(*vmf.pmd) &&
> +           hugepage_vma_check(vma, vm_flags, false, true)) {
>                 ret = create_huge_pmd(&vmf);
>                 if (!(ret & VM_FAULT_FALLBACK))
>                         return ret;
> --
> 2.26.3
>
>