lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4wKCEHcSMz0Zu+dFH4wR2memTLtf9Cv5EsaxwbA5+jA-A@mail.gmail.com>
Date: Thu, 13 Jun 2024 20:39:26 +1200
From: Barry Song <21cnbao@...il.com>
To: david@...hat.com, akpm@...ux-foundation.org, linux-mm@...ck.org
Cc: chrisl@...nel.org, linux-kernel@...r.kernel.org, mhocko@...e.com, 
	ryan.roberts@....com, baolin.wang@...ux.alibaba.com, yosryahmed@...gle.com, 
	shy828301@...il.com, surenb@...gle.com, v-songbaohua@...o.com, 
	willy@...radead.org, ying.huang@...el.com, yuzhao@...gle.com
Subject: Re: [PATCH RFC 1/3] mm: extend rmap flags arguments for folio_add_new_anon_rmap

On Thu, Jun 13, 2024 at 12:07 PM Barry Song <21cnbao@...il.com> wrote:
>
> From: Barry Song <v-songbaohua@...o.com>
>
> In the case of do_swap_page(), a new anonymous folio isn’t necessarily
> exclusive. This patch extends the rmap flags to allow treating a new
> anon folio as either exclusive or non-exclusive. To maintain the current
> behavior, we always use EXCLUSIVE as arguments.
>
> Suggested-by: David Hildenbrand <david@...hat.com>
> Signed-off-by: Barry Song <v-songbaohua@...o.com>
> ---
>  include/linux/rmap.h    |  2 +-
>  kernel/events/uprobes.c |  2 +-
>  mm/huge_memory.c        |  2 +-
>  mm/khugepaged.c         |  2 +-
>  mm/memory.c             | 10 +++++-----
>  mm/migrate_device.c     |  2 +-
>  mm/rmap.c               | 15 +++++++++------
>  mm/swapfile.c           |  2 +-
>  mm/userfaultfd.c        |  2 +-
>  9 files changed, 21 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index cae38a2a643d..01a64e7e72b9 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -244,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
>  void folio_add_anon_rmap_pmd(struct folio *, struct page *,
>                 struct vm_area_struct *, unsigned long address, rmap_t flags);
>  void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
> -               unsigned long address);
> +               unsigned long address, rmap_t flags);
>  void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
>                 struct vm_area_struct *);
>  #define folio_add_file_rmap_pte(folio, page, vma) \
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2c83ba776fc7..c20368aa33dd 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -181,7 +181,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
>
>         if (new_page) {
>                 folio_get(new_folio);
> -               folio_add_new_anon_rmap(new_folio, vma, addr);
> +               folio_add_new_anon_rmap(new_folio, vma, addr, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(new_folio, vma);
>         } else
>                 /* no new page, just dec_mm_counter for old_page */
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index f409ea9fcc18..09a83e43c71a 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -973,7 +973,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>
>                 entry = mk_huge_pmd(page, vma->vm_page_prot);
>                 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> -               folio_add_new_anon_rmap(folio, vma, haddr);
> +               folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(folio, vma);
>                 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
>                 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 774a97e6e2da..4d759a7487d0 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1213,7 +1213,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>
>         spin_lock(pmd_ptl);
>         BUG_ON(!pmd_none(*pmd));
> -       folio_add_new_anon_rmap(folio, vma, address);
> +       folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
>         folio_add_lru_vma(folio, vma);
>         pgtable_trans_huge_deposit(mm, pmd, pgtable);
>         set_pmd_at(mm, address, pmd, _pmd);
> diff --git a/mm/memory.c b/mm/memory.c
> index 54d7d2acdf39..2f94921091fb 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -930,7 +930,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
>         *prealloc = NULL;
>         copy_user_highpage(&new_folio->page, page, addr, src_vma);
>         __folio_mark_uptodate(new_folio);
> -       folio_add_new_anon_rmap(new_folio, dst_vma, addr);
> +       folio_add_new_anon_rmap(new_folio, dst_vma, addr, RMAP_EXCLUSIVE);
>         folio_add_lru_vma(new_folio, dst_vma);
>         rss[MM_ANONPAGES]++;
>
> @@ -3400,7 +3400,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
>                  * some TLBs while the old PTE remains in others.
>                  */
>                 ptep_clear_flush(vma, vmf->address, vmf->pte);
> -               folio_add_new_anon_rmap(new_folio, vma, vmf->address);
> +               folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(new_folio, vma);
>                 BUG_ON(unshare && pte_write(entry));
>                 set_pte_at(mm, vmf->address, vmf->pte, entry);
> @@ -4337,7 +4337,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>
>         /* ksm created a completely new copy */
>         if (unlikely(folio != swapcache && swapcache)) {
> -               folio_add_new_anon_rmap(folio, vma, address);
> +               folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(folio, vma);
>         } else {
>                 folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address,
> @@ -4592,7 +4592,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>         count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
>  #endif
> -       folio_add_new_anon_rmap(folio, vma, addr);
> +       folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
>         folio_add_lru_vma(folio, vma);
>  setpte:
>         if (vmf_orig_pte_uffd_wp(vmf))
> @@ -4790,7 +4790,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
>         /* copy-on-write page */
>         if (write && !(vma->vm_flags & VM_SHARED)) {
>                 VM_BUG_ON_FOLIO(nr != 1, folio);
> -               folio_add_new_anon_rmap(folio, vma, addr);
> +               folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(folio, vma);
>         } else {
>                 folio_add_file_rmap_ptes(folio, page, nr, vma);
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 051d0a3ccbee..6d66dc1c6ffa 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -658,7 +658,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
>                 goto unlock_abort;
>
>         inc_mm_counter(mm, MM_ANONPAGES);
> -       folio_add_new_anon_rmap(folio, vma, addr);
> +       folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
>         if (!folio_is_zone_device(folio))
>                 folio_add_lru_vma(folio, vma);
>         folio_get(folio);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index b9e5943c8349..e612d999811a 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1406,14 +1406,14 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
>   * This means the inc-and-test can be bypassed.
>   * The folio does not have to be locked.
>   *
> - * If the folio is pmd-mappable, it is accounted as a THP.  As the folio
> - * is new, it's assumed to be mapped exclusively by a single process.
> + * If the folio is pmd-mappable, it is accounted as a THP.
>   */
>  void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
> -               unsigned long address)
> +               unsigned long address, rmap_t flags)
>  {
>         int nr = folio_nr_pages(folio);
>         int nr_pmdmapped = 0;
> +       bool exclusive = flags & RMAP_EXCLUSIVE;
>
>         VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
>         VM_BUG_ON_VMA(address < vma->vm_start ||
> @@ -1424,7 +1424,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
>         if (likely(!folio_test_large(folio))) {
>                 /* increment count (starts at -1) */
>                 atomic_set(&folio->_mapcount, 0);
> -               SetPageAnonExclusive(&folio->page);
> +               if (exclusive)
> +                       SetPageAnonExclusive(&folio->page);
>         } else if (!folio_test_pmd_mappable(folio)) {
>                 int i;
>
> @@ -1433,7 +1434,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
>
>                         /* increment count (starts at -1) */
>                         atomic_set(&page->_mapcount, 0);
> -                       SetPageAnonExclusive(page);
> +                       if (exclusive)
> +                               SetPageAnonExclusive(page);
>                 }
>
>                 /* increment count (starts at -1) */
> @@ -1445,7 +1447,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
>                 /* increment count (starts at -1) */
>                 atomic_set(&folio->_large_mapcount, 0);
>                 atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
> -               SetPageAnonExclusive(&folio->page);
> +               if (exclusive)
> +                       SetPageAnonExclusive(&folio->page);
>                 nr_pmdmapped = nr;
>         }
>

I am lacking this:

--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1408,7 +1408,7 @@ void folio_add_new_anon_rmap(struct folio
*folio, struct vm_area_struct *vma,
        VM_BUG_ON_VMA(address < vma->vm_start ||
                        address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
        __folio_set_swapbacked(folio);
-       __folio_set_anon(folio, vma, address, true);
+       __folio_set_anon(folio, vma, address, exclusive);

        if (likely(!folio_test_large(folio))) {
                /* increment count (starts at -1) */


> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 9c6d8e557c0f..ae1d2700f6a3 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1911,7 +1911,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
>
>                 folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags);
>         } else { /* ksm created a completely new copy */
> -               folio_add_new_anon_rmap(folio, vma, addr);
> +               folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(folio, vma);
>         }
>         new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5e7f2801698a..8dedaec00486 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -216,7 +216,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
>                         folio_add_lru(folio);
>                 folio_add_file_rmap_pte(folio, page, dst_vma);
>         } else {
> -               folio_add_new_anon_rmap(folio, dst_vma, dst_addr);
> +               folio_add_new_anon_rmap(folio, dst_vma, dst_addr, RMAP_EXCLUSIVE);
>                 folio_add_lru_vma(folio, dst_vma);
>         }
>
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ