[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a7944523fcc3634607691c35311a5d59d1a3f8d4.camel@mediatek.com>
Date: Thu, 18 Sep 2025 08:53:09 +0000
From: Qun-wei Lin (林群崴) <Qun-wei.Lin@...iatek.com>
To: "catalin.marinas@....com" <catalin.marinas@....com>,
"usamaarif642@...il.com" <usamaarif642@...il.com>, "linux-mm@...ck.org"
<linux-mm@...ck.org>, "yuzhao@...gle.com" <yuzhao@...gle.com>,
"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>
CC: "corbet@....net" <corbet@....net>,
Andrew Yang (楊智強) <Andrew.Yang@...iatek.com>,
"npache@...hat.com" <npache@...hat.com>, "rppt@...nel.org" <rppt@...nel.org>,
"willy@...radead.org" <willy@...radead.org>, "kernel-team@...a.com"
<kernel-team@...a.com>, "david@...hat.com" <david@...hat.com>,
"roman.gushchin@...ux.dev" <roman.gushchin@...ux.dev>, "hannes@...xchg.org"
<hannes@...xchg.org>, "cerasuolodomenico@...il.com"
<cerasuolodomenico@...il.com>, "linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>, "ryncsn@...il.com" <ryncsn@...il.com>,
"surenb@...gle.com" <surenb@...gle.com>, "riel@...riel.com"
<riel@...riel.com>, "shakeel.butt@...ux.dev" <shakeel.butt@...ux.dev>,
Chinwen Chang (張錦文)
<chinwen.chang@...iatek.com>, "linux-doc@...r.kernel.org"
<linux-doc@...r.kernel.org>, Casper Li (李中榮)
<casper.li@...iatek.com>, "ryan.roberts@....com" <ryan.roberts@....com>,
"linux-mediatek@...ts.infradead.org" <linux-mediatek@...ts.infradead.org>,
"baohua@...nel.org" <baohua@...nel.org>, "kaleshsingh@...gle.com"
<kaleshsingh@...gle.com>, "zhais@...gle.com" <zhais@...gle.com>,
"linux-arm-kernel@...ts.infradead.org" <linux-arm-kernel@...ts.infradead.org>
Subject: Re: [PATCH v5 2/6] mm: remap unused subpages to shared zeropage when
splitting isolated thp
On Fri, 2024-08-30 at 11:03 +0100, Usama Arif wrote:
> From: Yu Zhao <yuzhao@...gle.com>
>
> Here being unused means containing only zeros and inaccessible to
> userspace. When splitting an isolated thp under reclaim or migration,
> the unused subpages can be mapped to the shared zeropage, hence
> saving
> memory. This is particularly helpful when the internal
> fragmentation of a thp is high, i.e. it has many untouched subpages.
>
> This is also a prerequisite for THP low utilization shrinker which
> will
> be introduced in later patches, where underutilized THPs are split,
> and
> the zero-filled pages are freed saving memory.
>
> Signed-off-by: Yu Zhao <yuzhao@...gle.com>
> Tested-by: Shuang Zhai <zhais@...gle.com>
> Signed-off-by: Usama Arif <usamaarif642@...il.com>
> ---
> include/linux/rmap.h | 7 ++++-
> mm/huge_memory.c | 8 ++---
> mm/migrate.c | 72 ++++++++++++++++++++++++++++++++++++++----
> --
> mm/migrate_device.c | 4 +--
> 4 files changed, 75 insertions(+), 16 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 91b5935e8485..d5e93e44322e 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -745,7 +745,12 @@ int folio_mkclean(struct folio *);
> int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages,
> pgoff_t pgoff,
> struct vm_area_struct *vma);
>
> -void remove_migration_ptes(struct folio *src, struct folio *dst,
> bool locked);
> +enum rmp_flags {
> + RMP_LOCKED = 1 << 0,
> + RMP_USE_SHARED_ZEROPAGE = 1 << 1,
> +};
> +
> +void remove_migration_ptes(struct folio *src, struct folio *dst, int
> flags);
>
> /*
> * rmap_walk_control: To control rmap traversing for specific needs
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0c48806ccb9a..af60684e7c70 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3020,7 +3020,7 @@ bool unmap_huge_pmd_locked(struct
> vm_area_struct *vma, unsigned long addr,
> return false;
> }
>
> -static void remap_page(struct folio *folio, unsigned long nr)
> +static void remap_page(struct folio *folio, unsigned long nr, int
> flags)
> {
> int i = 0;
>
> @@ -3028,7 +3028,7 @@ static void remap_page(struct folio *folio,
> unsigned long nr)
> if (!folio_test_anon(folio))
> return;
> for (;;) {
> - remove_migration_ptes(folio, folio, true);
> + remove_migration_ptes(folio, folio, RMP_LOCKED |
> flags);
> i += folio_nr_pages(folio);
> if (i >= nr)
> break;
> @@ -3240,7 +3240,7 @@ static void __split_huge_page(struct page
> *page, struct list_head *list,
>
> if (nr_dropped)
> shmem_uncharge(folio->mapping->host, nr_dropped);
> - remap_page(folio, nr);
> + remap_page(folio, nr, PageAnon(head) ?
> RMP_USE_SHARED_ZEROPAGE : 0);
>
> /*
> * set page to its compound_head when split to non order-0
> pages, so
> @@ -3542,7 +3542,7 @@ int split_huge_page_to_list_to_order(struct
> page *page, struct list_head *list,
> if (mapping)
> xas_unlock(&xas);
> local_irq_enable();
> - remap_page(folio, folio_nr_pages(folio));
> + remap_page(folio, folio_nr_pages(folio), 0);
> ret = -EAGAIN;
> }
>
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 6f9c62c746be..d039863e014b 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -204,13 +204,57 @@ bool isolate_folio_to_list(struct folio *folio,
> struct list_head *list)
> return true;
> }
>
> +static bool try_to_map_unused_to_zeropage(struct
> page_vma_mapped_walk *pvmw,
> + struct folio *folio,
> + unsigned long idx)
> +{
> + struct page *page = folio_page(folio, idx);
> + bool contains_data;
> + pte_t newpte;
> + void *addr;
> +
> + VM_BUG_ON_PAGE(PageCompound(page), page);
> + VM_BUG_ON_PAGE(!PageAnon(page), page);
> + VM_BUG_ON_PAGE(!PageLocked(page), page);
> + VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
> +
> + if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags &
> VM_LOCKED) ||
> + mm_forbids_zeropage(pvmw->vma->vm_mm))
> + return false;
> +
> + /*
> + * The pmd entry mapping the old thp was flushed and the pte
> mapping
> + * this subpage has been non present. If the subpage is only
> zero-filled
> + * then map it to the shared zeropage.
> + */
> + addr = kmap_local_page(page);
> + contains_data = memchr_inv(addr, 0, PAGE_SIZE);
> + kunmap_local(addr);
> +
> + if (contains_data)
> + return false;
> +
> + newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
> + pvmw->vma->vm_page_prot));
> + set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte,
> newpte);
> +
> + dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
> + return true;
> +}
> +
> +struct rmap_walk_arg {
> + struct folio *folio;
> + bool map_unused_to_zeropage;
> +};
> +
> /*
> * Restore a potential migration pte to a working pte entry
> */
> static bool remove_migration_pte(struct folio *folio,
> - struct vm_area_struct *vma, unsigned long addr, void
> *old)
> + struct vm_area_struct *vma, unsigned long addr, void
> *arg)
> {
> - DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC |
> PVMW_MIGRATION);
> + struct rmap_walk_arg *rmap_walk_arg = arg;
> + DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr,
> PVMW_SYNC | PVMW_MIGRATION);
>
> while (page_vma_mapped_walk(&pvmw)) {
> rmap_t rmap_flags = RMAP_NONE;
> @@ -234,6 +278,9 @@ static bool remove_migration_pte(struct folio
> *folio,
> continue;
> }
> #endif
> + if (rmap_walk_arg->map_unused_to_zeropage &&
> + try_to_map_unused_to_zeropage(&pvmw, folio,
> idx))
> + continue;
>
> folio_get(folio);
> pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
> @@ -312,14 +359,21 @@ static bool remove_migration_pte(struct folio
> *folio,
> * Get rid of all migration entries and replace them by
> * references to the indicated page.
> */
> -void remove_migration_ptes(struct folio *src, struct folio *dst,
> bool locked)
> +void remove_migration_ptes(struct folio *src, struct folio *dst, int
> flags)
> {
> + struct rmap_walk_arg rmap_walk_arg = {
> + .folio = src,
> + .map_unused_to_zeropage = flags &
> RMP_USE_SHARED_ZEROPAGE,
> + };
> +
> struct rmap_walk_control rwc = {
> .rmap_one = remove_migration_pte,
> - .arg = src,
> + .arg = &rmap_walk_arg,
> };
>
> - if (locked)
> + VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src !=
> dst), src);
> +
> + if (flags & RMP_LOCKED)
> rmap_walk_locked(dst, &rwc);
> else
> rmap_walk(dst, &rwc);
> @@ -934,7 +988,7 @@ static int writeout(struct address_space
> *mapping, struct folio *folio)
> * At this point we know that the migration attempt cannot
> * be successful.
> */
> - remove_migration_ptes(folio, folio, false);
> + remove_migration_ptes(folio, folio, 0);
>
> rc = mapping->a_ops->writepage(&folio->page, &wbc);
>
> @@ -1098,7 +1152,7 @@ static void migrate_folio_undo_src(struct folio
> *src,
> struct list_head *ret)
> {
> if (page_was_mapped)
> - remove_migration_ptes(src, src, false);
> + remove_migration_ptes(src, src, 0);
> /* Drop an anon_vma reference if we took one */
> if (anon_vma)
> put_anon_vma(anon_vma);
> @@ -1336,7 +1390,7 @@ static int migrate_folio_move(free_folio_t
> put_new_folio, unsigned long private,
> lru_add_drain();
>
> if (old_page_state & PAGE_WAS_MAPPED)
> - remove_migration_ptes(src, dst, false);
> + remove_migration_ptes(src, dst, 0);
>
> out_unlock_both:
> folio_unlock(dst);
> @@ -1474,7 +1528,7 @@ static int unmap_and_move_huge_page(new_folio_t
> get_new_folio,
>
> if (page_was_mapped)
> remove_migration_ptes(src,
> - rc == MIGRATEPAGE_SUCCESS ? dst : src,
> false);
> + rc == MIGRATEPAGE_SUCCESS ? dst : src, 0);
>
> unlock_put_anon:
> folio_unlock(dst);
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 8d687de88a03..9cf26592ac93 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -424,7 +424,7 @@ static unsigned long
> migrate_device_unmap(unsigned long *src_pfns,
> continue;
>
> folio = page_folio(page);
> - remove_migration_ptes(folio, folio, false);
> + remove_migration_ptes(folio, folio, 0);
>
> src_pfns[i] = 0;
> folio_unlock(folio);
> @@ -840,7 +840,7 @@ void migrate_device_finalize(unsigned long
> *src_pfns,
> dst = src;
> }
>
> - remove_migration_ptes(src, dst, false);
> + remove_migration_ptes(src, dst, 0);
> folio_unlock(src);
>
> if (folio_is_zone_device(src))
Hi,
This patch has been in the mainline for some time, but we recently
discovered an issue when both mTHP and MTE (Memory Tagging Extension)
are enabled.
It seems that remapping to the same zeropage might causes MTE tag
mismatches, since MTE tags are associated with physical addresses.
In Android, the tombstone is as follows:
---
Build fingerprint:
'alps/vext_k6993v1_64/k6993v1_64:16/BP2A.250605.031.A3/mp1cs1ofp41:user
debug/dev-keys'
Revision: '0'
ABI: 'arm64'
Timestamp: 2025-08-12 04:58:28.507086720+0800
Process uptime: 0s
Cmdline: /system/bin/audioserver
pid: 8217, tid: 8882, name: binder:8217_4 >>> /system/bin/audioserver
<<<
uid: 1041
tagged_addr_ctrl: 000000000007fff3 (PR_TAGGED_ADDR_ENABLE,
PR_MTE_TCF_SYNC, mask 0xfffe)
signal 11 (SIGSEGV), code 9 (SEGV_MTESERR), fault addr
0x0a00007055220000
Cause: [MTE]: Buffer Overflow, 14016 bytes into a 23070-byte allocation
at 0x705521c940
x0 0a0000705521c940 x1 0300006f75210ab0 x2 00000000000022a5
x3 0a0000705521ffc0
x4 0300006f75212de5 x5 0a000070552222f5 x6 0000000000005a1e
x7 0000000000000000
x8 339c000005a1e11e x9 00000000f041339c x10 000000009dd48904
x11 000000000000ffff
x12 0000000022b70889 x13 000000004cc0b2ff x14 0000000000000000
x15 0000000000000010
x16 00000071cc5d8fc0 x17 00000071cc54e040 x18 0000006ef7bd4000
x19 0300006f7520d430
x20 00000071cc5e0340 x21 0000000000005a1e x22 0a0000705521c940
x23 00000000000059b5
x24 00000000000000b1 x25 0300006f75212e4e x26 caa20000059b511d
x27 000000000000001d
x28 0300006f75212e30 x29 0000006f1fe385f0
lr 00000071cc54200c sp 0000006f1fe385c0 pc 00000071cc54e158
pst 0000000020001000
26 total frames
backtrace:
#00 pc 000000000006c158
/apex/com.android.runtime/lib64/bionic/libc.so
(__memcpy_aarch64_simd+280) (BuildId: 1e819f3e369d59be98bee38a8fbd0322)
#01 pc 0000000000060008
/apex/com.android.runtime/lib64/bionic/libc.so
(scudo::Allocator<scudo::AndroidNormalConfig,
&scudo_malloc_postinit>::reallocate(void*, unsigned long, unsigned
long)+696) (BuildId: 1e819f3e369d59be98bee38a8fbd0322)
#02 pc 000000000005fccc
/apex/com.android.runtime/lib64/bionic/libc.so (scudo_realloc+44)
(BuildId: 1e819f3e369d59be98bee38a8fbd0322)
#03 pc 000000000005c2cc
/apex/com.android.runtime/lib64/bionic/libc.so (LimitRealloc(void*,
unsigned long)+124) (BuildId: 1e819f3e369d59be98bee38a8fbd0322)
#04 pc 0000000000059a90
/apex/com.android.runtime/lib64/bionic/libc.so (realloc+160) (BuildId:
1e819f3e369d59be98bee38a8fbd0322)
#05 pc 0000000000011a74 /system/lib64/libutils.so
(android::SharedBuffer::editResize(unsigned long) const+68) (BuildId:
7aa2d71e030a290c8dd28236ba0a838f)
#06 pc 0000000000011ba8 /system/lib64/libutils.so
(android::String8::real_append(char const*, unsigned long)+88)
(BuildId: 7aa2d71e030a290c8dd28236ba0a838f)
#07 pc 000000000007b880
/system/lib64/libaudiopolicycomponents.so
(android::DeviceDescriptor::dump(android::String8*, int, bool)
const+208) (BuildId: 553fefffdca2f3a5dde634e123bd2c81)
#08 pc 000000000008094c
/system/lib64/libaudiopolicycomponents.so
(android::DeviceVector::dump(android::String8*, android::String8
const&, int, bool) const+636) (BuildId:
553fefffdca2f3a5dde634e123bd2c81)
#09 pc 0000000000092ed4
/system/lib64/libaudiopolicycomponents.so
(android::IOProfile::dump(android::String8*, int) const+980) (BuildId:
553fefffdca2f3a5dde634e123bd2c81)
#10 pc 000000000008bd7c
/system/lib64/libaudiopolicycomponents.so
(android::HwModule::dump(android::String8*, int) const+1148) (BuildId:
553fefffdca2f3a5dde634e123bd2c81)
#11 pc 000000000009044c
/system/lib64/libaudiopolicycomponents.so
(android::HwModuleCollection::dump(android::String8*) const+508)
(BuildId: 553fefffdca2f3a5dde634e123bd2c81)
#12 pc 0000000000090134
/system/lib64/libaudiopolicymanagerdefault.so
(android::AudioPolicyManager::dump(android::String8*) const+3908)
(BuildId: fdba879fc1a0c470759bfeb3d594ab81)
#13 pc 0000000000092e40
/system/lib64/libaudiopolicymanagerdefault.so
(android::AudioPolicyManager::dump(int)+80) (BuildId:
fdba879fc1a0c470759bfeb3d594ab81)
#14 pc 000000000022b218 /system/bin/audioserver
(android::AudioPolicyService::dump(int,
android::Vector<android::String16> const&)+392) (BuildId:
1988c27ce74b125f598a07a93367cfdd)
#15 pc 000000000022c8cc /system/bin/audioserver (non-virtual
thunk to android::AudioPolicyService::dump(int,
android::Vector<android::String16> const&)+12) (BuildId:
1988c27ce74b125f598a07a93367cfdd)
#16 pc 00000000000883f4 /system/lib64/libbinder.so
(android::BBinder::onTransact(unsigned int, android::Parcel const&,
android::Parcel*, unsigned int)+340) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
#17 pc 000000000003fadc /system/lib64/audiopolicy-aidl-cpp.so
(android::media::BnAudioPolicyService::onTransact(unsigned int,
android::Parcel const&, android::Parcel*, unsigned int)+19884)
(BuildId: ae185d80e4e54668275f262317dc2d7d)
#18 pc 000000000022adc4 /system/bin/audioserver
(android::AudioPolicyService::onTransact(unsigned int, android::Parcel
const&, android::Parcel*, unsigned int)+1076) (BuildId:
1988c27ce74b125f598a07a93367cfdd)
#19 pc 0000000000048adc /system/lib64/libbinder.so
(android::IPCThreadState::executeCommand(int)+748) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
#20 pc 0000000000051788 /system/lib64/libbinder.so
(android::IPCThreadState::joinThreadPool(bool)+296) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
#21 pc 000000000007e528 /system/lib64/libbinder.so
(android::PoolThread::threadLoop()+24) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
#22 pc 0000000000019268 /system/lib64/libutils.so
(android::Thread::_threadLoop(void*)+248) (BuildId:
7aa2d71e030a290c8dd28236ba0a838f)
#23 pc 000000000001b994 /system/lib64/libutils.so
(libutil_thread_trampoline(void*)
(.__uniq.226528677032898775202282855395389835431)+20) (BuildId:
7aa2d71e030a290c8dd28236ba0a838f)
#24 pc 0000000000083c8c
/apex/com.android.runtime/lib64/bionic/libc.so
(__pthread_start(void*)+236) (BuildId:
1e819f3e369d59be98bee38a8fbd0322)
#25 pc 00000000000761a0
/apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+64)
(BuildId: 1e819f3e369d59be98bee38a8fbd0322)
Memory tags around the fault address (0xa00007055220000), one tag per
16 bytes:
0x705521f800: a a a a a a a a a a a a a a a a
0x705521f900: a a a a a a a a a a a a a a a a
0x705521fa00: a a a a a a a a a a a a a a a a
0x705521fb00: a a a a a a a a a a a a a a a a
0x705521fc00: a a a a a a a a a a a a a a a a
0x705521fd00: a a a a a a a a a a a a a a a a
0x705521fe00: a a a a a a a a a a a a a a a a
0x705521ff00: a a a a a a a a a a a a a a a a
=>0x7055220000:[0] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220100: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220200: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220300: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220400: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220500: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220600: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0x7055220700: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
---
Whenever the memory pressure is high, it will happen to any process
with MTE enabled.
Any suggestion is appreciated.
Thanks,
Qun-wei
Powered by blists - more mailing lists