lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAHbLzkrTmt=5ECLG0ZQs=14BxHhKZtDY1DfG3mSau7LU-0PeKw@mail.gmail.com>
Date:   Tue, 17 Oct 2023 13:41:46 -0700
From:   Yang Shi <shy828301@...il.com>
To:     "Vishal Moola (Oracle)" <vishal.moola@...il.com>
Cc:     linux-mm@...ck.org, linux-kernel@...r.kernel.org,
        akpm@...ux-foundation.org
Subject: Re: [PATCH 2/5] mm/khugepaged: Convert hpage_collapse_scan_pmd() to
 use folios

On Mon, Oct 16, 2023 at 1:06 PM Vishal Moola (Oracle)
<vishal.moola@...il.com> wrote:
>
> Replaces 5 calls to compound_head(), and removes 1466 bytes of kernel
> text.
>
> Previously, to determine if any pte was shared, the page mapcount
> corresponding exactly to the pte was checked. This gave us a precise
> number of shared ptes. Using folio_estimated_sharers() instead uses
> the mapcount of the head page, giving us an estimate for tail page ptes.
>
> This means if a tail page's mapcount is greater than its head page's
> mapcount, folio_estimated_sharers() would be underestimating the number of
> shared ptes, and vice versa.
>
> Signed-off-by: Vishal Moola (Oracle) <vishal.moola@...il.com>
> ---
>  mm/khugepaged.c | 26 ++++++++++++--------------
>  1 file changed, 12 insertions(+), 14 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 7a552fe16c92..67aac53b31c8 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1245,7 +1245,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>         pte_t *pte, *_pte;
>         int result = SCAN_FAIL, referenced = 0;
>         int none_or_zero = 0, shared = 0;
> -       struct page *page = NULL;
> +       struct folio *folio = NULL;
>         unsigned long _address;
>         spinlock_t *ptl;
>         int node = NUMA_NO_NODE, unmapped = 0;
> @@ -1316,13 +1316,13 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                 if (pte_write(pteval))
>                         writable = true;
>
> -               page = vm_normal_page(vma, _address, pteval);
> -               if (unlikely(!page) || unlikely(is_zone_device_page(page))) {
> +               folio = vm_normal_folio(vma, _address, pteval);
> +               if (unlikely(!folio) || unlikely(folio_is_zone_device(folio))) {
>                         result = SCAN_PAGE_NULL;
>                         goto out_unmap;
>                 }
>
> -               if (page_mapcount(page) > 1) {
> +               if (folio_estimated_sharers(folio) > 1) {

This doesn't look correct. The max_ptes_shared is used to control the
cap of shared PTEs. IIRC, folio_estimated_sharers() just reads the
mapcount of the head page. If we set max_ptes_shared to 256, and just
the head page is shared, but "shared" will return 512 and prevent from
collapsing the area even though just one PTE is shared. This breaks
the semantics of max_ptes_shared.

>                         ++shared;
>                         if (cc->is_khugepaged &&
>                             shared > khugepaged_max_ptes_shared) {
> @@ -1332,29 +1332,27 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                         }
>                 }
>
> -               page = compound_head(page);
> -
>                 /*
>                  * Record which node the original page is from and save this
>                  * information to cc->node_load[].
>                  * Khugepaged will allocate hugepage from the node has the max
>                  * hit record.
>                  */
> -               node = page_to_nid(page);
> +               node = folio_nid(folio);
>                 if (hpage_collapse_scan_abort(node, cc)) {
>                         result = SCAN_SCAN_ABORT;
>                         goto out_unmap;
>                 }
>                 cc->node_load[node]++;
> -               if (!PageLRU(page)) {
> +               if (!folio_test_lru(folio)) {
>                         result = SCAN_PAGE_LRU;
>                         goto out_unmap;
>                 }
> -               if (PageLocked(page)) {
> +               if (folio_test_locked(folio)) {
>                         result = SCAN_PAGE_LOCK;
>                         goto out_unmap;
>                 }
> -               if (!PageAnon(page)) {
> +               if (!folio_test_anon(folio)) {
>                         result = SCAN_PAGE_ANON;
>                         goto out_unmap;
>                 }
> @@ -1369,7 +1367,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                  * has excessive GUP pins (i.e. 512).  Anyway the same check
>                  * will be done again later the risk seems low.
>                  */
> -               if (!is_refcount_suitable(page)) {
> +               if (!is_refcount_suitable(&folio->page)) {
>                         result = SCAN_PAGE_COUNT;
>                         goto out_unmap;
>                 }
> @@ -1379,8 +1377,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                  * enough young pte to justify collapsing the page
>                  */
>                 if (cc->is_khugepaged &&
> -                   (pte_young(pteval) || page_is_young(page) ||
> -                    PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm,
> +                   (pte_young(pteval) || folio_test_young(folio) ||
> +                    folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm,
>                                                                      address)))
>                         referenced++;
>         }
> @@ -1402,7 +1400,7 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
>                 *mmap_locked = false;
>         }
>  out:
> -       trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
> +       trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced,
>                                      none_or_zero, result, unmapped);
>         return result;
>  }
> --
> 2.40.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ