[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4wKCEHcSMz0Zu+dFH4wR2memTLtf9Cv5EsaxwbA5+jA-A@mail.gmail.com>
Date: Thu, 13 Jun 2024 20:39:26 +1200
From: Barry Song <21cnbao@...il.com>
To: david@...hat.com, akpm@...ux-foundation.org, linux-mm@...ck.org
Cc: chrisl@...nel.org, linux-kernel@...r.kernel.org, mhocko@...e.com,
ryan.roberts@....com, baolin.wang@...ux.alibaba.com, yosryahmed@...gle.com,
shy828301@...il.com, surenb@...gle.com, v-songbaohua@...o.com,
willy@...radead.org, ying.huang@...el.com, yuzhao@...gle.com
Subject: Re: [PATCH RFC 1/3] mm: extend rmap flags arguments for folio_add_new_anon_rmap
On Thu, Jun 13, 2024 at 12:07 PM Barry Song <21cnbao@...il.com> wrote:
>
> From: Barry Song <v-songbaohua@...o.com>
>
> In the case of do_swap_page(), a new anonymous folio isn’t necessarily
> exclusive. This patch extends the rmap flags to allow treating a new
> anon folio as either exclusive or non-exclusive. To maintain the current
> behavior, we always use EXCLUSIVE as arguments.
>
> Suggested-by: David Hildenbrand <david@...hat.com>
> Signed-off-by: Barry Song <v-songbaohua@...o.com>
> ---
> include/linux/rmap.h | 2 +-
> kernel/events/uprobes.c | 2 +-
> mm/huge_memory.c | 2 +-
> mm/khugepaged.c | 2 +-
> mm/memory.c | 10 +++++-----
> mm/migrate_device.c | 2 +-
> mm/rmap.c | 15 +++++++++------
> mm/swapfile.c | 2 +-
> mm/userfaultfd.c | 2 +-
> 9 files changed, 21 insertions(+), 18 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index cae38a2a643d..01a64e7e72b9 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -244,7 +244,7 @@ void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages,
> void folio_add_anon_rmap_pmd(struct folio *, struct page *,
> struct vm_area_struct *, unsigned long address, rmap_t flags);
> void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *,
> - unsigned long address);
> + unsigned long address, rmap_t flags);
> void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages,
> struct vm_area_struct *);
> #define folio_add_file_rmap_pte(folio, page, vma) \
> diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> index 2c83ba776fc7..c20368aa33dd 100644
> --- a/kernel/events/uprobes.c
> +++ b/kernel/events/uprobes.c
> @@ -181,7 +181,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
>
> if (new_page) {
> folio_get(new_folio);
> - folio_add_new_anon_rmap(new_folio, vma, addr);
> + folio_add_new_anon_rmap(new_folio, vma, addr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(new_folio, vma);
> } else
> /* no new page, just dec_mm_counter for old_page */
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index f409ea9fcc18..09a83e43c71a 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -973,7 +973,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
>
> entry = mk_huge_pmd(page, vma->vm_page_prot);
> entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
> - folio_add_new_anon_rmap(folio, vma, haddr);
> + folio_add_new_anon_rmap(folio, vma, haddr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, vma);
> pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
> set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 774a97e6e2da..4d759a7487d0 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1213,7 +1213,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address,
>
> spin_lock(pmd_ptl);
> BUG_ON(!pmd_none(*pmd));
> - folio_add_new_anon_rmap(folio, vma, address);
> + folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, vma);
> pgtable_trans_huge_deposit(mm, pmd, pgtable);
> set_pmd_at(mm, address, pmd, _pmd);
> diff --git a/mm/memory.c b/mm/memory.c
> index 54d7d2acdf39..2f94921091fb 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -930,7 +930,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
> *prealloc = NULL;
> copy_user_highpage(&new_folio->page, page, addr, src_vma);
> __folio_mark_uptodate(new_folio);
> - folio_add_new_anon_rmap(new_folio, dst_vma, addr);
> + folio_add_new_anon_rmap(new_folio, dst_vma, addr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(new_folio, dst_vma);
> rss[MM_ANONPAGES]++;
>
> @@ -3400,7 +3400,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
> * some TLBs while the old PTE remains in others.
> */
> ptep_clear_flush(vma, vmf->address, vmf->pte);
> - folio_add_new_anon_rmap(new_folio, vma, vmf->address);
> + folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE);
> folio_add_lru_vma(new_folio, vma);
> BUG_ON(unshare && pte_write(entry));
> set_pte_at(mm, vmf->address, vmf->pte, entry);
> @@ -4337,7 +4337,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
>
> /* ksm created a completely new copy */
> if (unlikely(folio != swapcache && swapcache)) {
> - folio_add_new_anon_rmap(folio, vma, address);
> + folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, vma);
> } else {
> folio_add_anon_rmap_ptes(folio, page, nr_pages, vma, address,
> @@ -4592,7 +4592,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
> #endif
> - folio_add_new_anon_rmap(folio, vma, addr);
> + folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, vma);
> setpte:
> if (vmf_orig_pte_uffd_wp(vmf))
> @@ -4790,7 +4790,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
> /* copy-on-write page */
> if (write && !(vma->vm_flags & VM_SHARED)) {
> VM_BUG_ON_FOLIO(nr != 1, folio);
> - folio_add_new_anon_rmap(folio, vma, addr);
> + folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, vma);
> } else {
> folio_add_file_rmap_ptes(folio, page, nr, vma);
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 051d0a3ccbee..6d66dc1c6ffa 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -658,7 +658,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
> goto unlock_abort;
>
> inc_mm_counter(mm, MM_ANONPAGES);
> - folio_add_new_anon_rmap(folio, vma, addr);
> + folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
> if (!folio_is_zone_device(folio))
> folio_add_lru_vma(folio, vma);
> folio_get(folio);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index b9e5943c8349..e612d999811a 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1406,14 +1406,14 @@ void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page,
> * This means the inc-and-test can be bypassed.
> * The folio does not have to be locked.
> *
> - * If the folio is pmd-mappable, it is accounted as a THP. As the folio
> - * is new, it's assumed to be mapped exclusively by a single process.
> + * If the folio is pmd-mappable, it is accounted as a THP.
> */
> void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
> - unsigned long address)
> + unsigned long address, rmap_t flags)
> {
> int nr = folio_nr_pages(folio);
> int nr_pmdmapped = 0;
> + bool exclusive = flags & RMAP_EXCLUSIVE;
>
> VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio);
> VM_BUG_ON_VMA(address < vma->vm_start ||
> @@ -1424,7 +1424,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
> if (likely(!folio_test_large(folio))) {
> /* increment count (starts at -1) */
> atomic_set(&folio->_mapcount, 0);
> - SetPageAnonExclusive(&folio->page);
> + if (exclusive)
> + SetPageAnonExclusive(&folio->page);
> } else if (!folio_test_pmd_mappable(folio)) {
> int i;
>
> @@ -1433,7 +1434,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
>
> /* increment count (starts at -1) */
> atomic_set(&page->_mapcount, 0);
> - SetPageAnonExclusive(page);
> + if (exclusive)
> + SetPageAnonExclusive(page);
> }
>
> /* increment count (starts at -1) */
> @@ -1445,7 +1447,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
> /* increment count (starts at -1) */
> atomic_set(&folio->_large_mapcount, 0);
> atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED);
> - SetPageAnonExclusive(&folio->page);
> + if (exclusive)
> + SetPageAnonExclusive(&folio->page);
> nr_pmdmapped = nr;
> }
>
I am lacking this:
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1408,7 +1408,7 @@ void folio_add_new_anon_rmap(struct folio
*folio, struct vm_area_struct *vma,
VM_BUG_ON_VMA(address < vma->vm_start ||
address + (nr << PAGE_SHIFT) > vma->vm_end, vma);
__folio_set_swapbacked(folio);
- __folio_set_anon(folio, vma, address, true);
+ __folio_set_anon(folio, vma, address, exclusive);
if (likely(!folio_test_large(folio))) {
/* increment count (starts at -1) */
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 9c6d8e557c0f..ae1d2700f6a3 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -1911,7 +1911,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
>
> folio_add_anon_rmap_pte(folio, page, vma, addr, rmap_flags);
> } else { /* ksm created a completely new copy */
> - folio_add_new_anon_rmap(folio, vma, addr);
> + folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, vma);
> }
> new_pte = pte_mkold(mk_pte(page, vma->vm_page_prot));
> diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
> index 5e7f2801698a..8dedaec00486 100644
> --- a/mm/userfaultfd.c
> +++ b/mm/userfaultfd.c
> @@ -216,7 +216,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd,
> folio_add_lru(folio);
> folio_add_file_rmap_pte(folio, page, dst_vma);
> } else {
> - folio_add_new_anon_rmap(folio, dst_vma, dst_addr);
> + folio_add_new_anon_rmap(folio, dst_vma, dst_addr, RMAP_EXCLUSIVE);
> folio_add_lru_vma(folio, dst_vma);
> }
>
> --
> 2.34.1
>
Powered by blists - more mailing lists