lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a7944523fcc3634607691c35311a5d59d1a3f8d4.camel@mediatek.com>
Date: Thu, 18 Sep 2025 08:53:09 +0000
From: Qun-wei Lin (林群崴) <Qun-wei.Lin@...iatek.com>
To: "catalin.marinas@....com" <catalin.marinas@....com>,
	"usamaarif642@...il.com" <usamaarif642@...il.com>, "linux-mm@...ck.org"
	<linux-mm@...ck.org>, "yuzhao@...gle.com" <yuzhao@...gle.com>,
	"akpm@...ux-foundation.org" <akpm@...ux-foundation.org>
CC: "corbet@....net" <corbet@....net>,
	Andrew Yang (楊智強) <Andrew.Yang@...iatek.com>,
	"npache@...hat.com" <npache@...hat.com>, "rppt@...nel.org" <rppt@...nel.org>,
	"willy@...radead.org" <willy@...radead.org>, "kernel-team@...a.com"
	<kernel-team@...a.com>, "david@...hat.com" <david@...hat.com>,
	"roman.gushchin@...ux.dev" <roman.gushchin@...ux.dev>, "hannes@...xchg.org"
	<hannes@...xchg.org>, "cerasuolodomenico@...il.com"
	<cerasuolodomenico@...il.com>, "linux-kernel@...r.kernel.org"
	<linux-kernel@...r.kernel.org>, "ryncsn@...il.com" <ryncsn@...il.com>,
	"surenb@...gle.com" <surenb@...gle.com>, "riel@...riel.com"
	<riel@...riel.com>, "shakeel.butt@...ux.dev" <shakeel.butt@...ux.dev>,
	Chinwen Chang (張錦文)
	<chinwen.chang@...iatek.com>, "linux-doc@...r.kernel.org"
	<linux-doc@...r.kernel.org>, Casper Li (李中榮)
	<casper.li@...iatek.com>, "ryan.roberts@....com" <ryan.roberts@....com>,
	"linux-mediatek@...ts.infradead.org" <linux-mediatek@...ts.infradead.org>,
	"baohua@...nel.org" <baohua@...nel.org>, "kaleshsingh@...gle.com"
	<kaleshsingh@...gle.com>, "zhais@...gle.com" <zhais@...gle.com>,
	"linux-arm-kernel@...ts.infradead.org" <linux-arm-kernel@...ts.infradead.org>
Subject: Re: [PATCH v5 2/6] mm: remap unused subpages to shared zeropage when
 splitting isolated thp

On Fri, 2024-08-30 at 11:03 +0100, Usama Arif wrote:
> From: Yu Zhao <yuzhao@...gle.com>
> 
> Here being unused means containing only zeros and inaccessible to
> userspace. When splitting an isolated thp under reclaim or migration,
> the unused subpages can be mapped to the shared zeropage, hence
> saving
> memory. This is particularly helpful when the internal
> fragmentation of a thp is high, i.e. it has many untouched subpages.
> 
> This is also a prerequisite for THP low utilization shrinker which
> will
> be introduced in later patches, where underutilized THPs are split,
> and
> the zero-filled pages are freed saving memory.
> 
> Signed-off-by: Yu Zhao <yuzhao@...gle.com>
> Tested-by: Shuang Zhai <zhais@...gle.com>
> Signed-off-by: Usama Arif <usamaarif642@...il.com>
> ---
>  include/linux/rmap.h |  7 ++++-
>  mm/huge_memory.c     |  8 ++---
>  mm/migrate.c         | 72 ++++++++++++++++++++++++++++++++++++++----
> --
>  mm/migrate_device.c  |  4 +--
>  4 files changed, 75 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 91b5935e8485..d5e93e44322e 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -745,7 +745,12 @@ int folio_mkclean(struct folio *);
>  int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages,
> pgoff_t pgoff,
>  		      struct vm_area_struct *vma);
>  
> -void remove_migration_ptes(struct folio *src, struct folio *dst,
> bool locked);
> +enum rmp_flags {
> +	RMP_LOCKED		= 1 << 0,
> +	RMP_USE_SHARED_ZEROPAGE	= 1 << 1,
> +};
> +
> +void remove_migration_ptes(struct folio *src, struct folio *dst, int
> flags);
>  
>  /*
>   * rmap_walk_control: To control rmap traversing for specific needs
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0c48806ccb9a..af60684e7c70 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3020,7 +3020,7 @@ bool unmap_huge_pmd_locked(struct
> vm_area_struct *vma, unsigned long addr,
>  	return false;
>  }
>  
> -static void remap_page(struct folio *folio, unsigned long nr)
> +static void remap_page(struct folio *folio, unsigned long nr, int
> flags)
>  {
>  	int i = 0;
>  
> @@ -3028,7 +3028,7 @@ static void remap_page(struct folio *folio,
> unsigned long nr)
>  	if (!folio_test_anon(folio))
>  		return;
>  	for (;;) {
> -		remove_migration_ptes(folio, folio, true);
> +		remove_migration_ptes(folio, folio, RMP_LOCKED |
> flags);
>  		i += folio_nr_pages(folio);
>  		if (i >= nr)
>  			break;
> @@ -3240,7 +3240,7 @@ static void __split_huge_page(struct page
> *page, struct list_head *list,
>  
>  	if (nr_dropped)
>  		shmem_uncharge(folio->mapping->host, nr_dropped);
> -	remap_page(folio, nr);
> +	remap_page(folio, nr, PageAnon(head) ?
> RMP_USE_SHARED_ZEROPAGE : 0);
>  
>  	/*
>  	 * set page to its compound_head when split to non order-0
> pages, so
> @@ -3542,7 +3542,7 @@ int split_huge_page_to_list_to_order(struct
> page *page, struct list_head *list,
>  		if (mapping)
>  			xas_unlock(&xas);
>  		local_irq_enable();
> -		remap_page(folio, folio_nr_pages(folio));
> +		remap_page(folio, folio_nr_pages(folio), 0);
>  		ret = -EAGAIN;
>  	}
>  
> diff --git a/mm/migrate.c b/mm/migrate.c
> index 6f9c62c746be..d039863e014b 100644
> --- a/mm/migrate.c
> +++ b/mm/migrate.c
> @@ -204,13 +204,57 @@ bool isolate_folio_to_list(struct folio *folio,
> struct list_head *list)
>  	return true;
>  }
>  
> +static bool try_to_map_unused_to_zeropage(struct
> page_vma_mapped_walk *pvmw,
> +					  struct folio *folio,
> +					  unsigned long idx)
> +{
> +	struct page *page = folio_page(folio, idx);
> +	bool contains_data;
> +	pte_t newpte;
> +	void *addr;
> +
> +	VM_BUG_ON_PAGE(PageCompound(page), page);
> +	VM_BUG_ON_PAGE(!PageAnon(page), page);
> +	VM_BUG_ON_PAGE(!PageLocked(page), page);
> +	VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page);
> +
> +	if (folio_test_mlocked(folio) || (pvmw->vma->vm_flags &
> VM_LOCKED) ||
> +	    mm_forbids_zeropage(pvmw->vma->vm_mm))
> +		return false;
> +
> +	/*
> +	 * The pmd entry mapping the old thp was flushed and the pte
> mapping
> +	 * this subpage has been non present. If the subpage is only
> zero-filled
> +	 * then map it to the shared zeropage.
> +	 */
> +	addr = kmap_local_page(page);
> +	contains_data = memchr_inv(addr, 0, PAGE_SIZE);
> +	kunmap_local(addr);
> +
> +	if (contains_data)
> +		return false;
> +
> +	newpte = pte_mkspecial(pfn_pte(my_zero_pfn(pvmw->address),
> +					pvmw->vma->vm_page_prot));
> +	set_pte_at(pvmw->vma->vm_mm, pvmw->address, pvmw->pte,
> newpte);
> +
> +	dec_mm_counter(pvmw->vma->vm_mm, mm_counter(folio));
> +	return true;
> +}
> +
> +struct rmap_walk_arg {
> +	struct folio *folio;
> +	bool map_unused_to_zeropage;
> +};
> +
>  /*
>   * Restore a potential migration pte to a working pte entry
>   */
>  static bool remove_migration_pte(struct folio *folio,
> -		struct vm_area_struct *vma, unsigned long addr, void
> *old)
> +		struct vm_area_struct *vma, unsigned long addr, void
> *arg)
>  {
> -	DEFINE_FOLIO_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC |
> PVMW_MIGRATION);
> +	struct rmap_walk_arg *rmap_walk_arg = arg;
> +	DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->folio, vma, addr,
> PVMW_SYNC | PVMW_MIGRATION);
>  
>  	while (page_vma_mapped_walk(&pvmw)) {
>  		rmap_t rmap_flags = RMAP_NONE;
> @@ -234,6 +278,9 @@ static bool remove_migration_pte(struct folio
> *folio,
>  			continue;
>  		}
>  #endif
> +		if (rmap_walk_arg->map_unused_to_zeropage &&
> +		    try_to_map_unused_to_zeropage(&pvmw, folio,
> idx))
> +			continue;
>  
>  		folio_get(folio);
>  		pte = mk_pte(new, READ_ONCE(vma->vm_page_prot));
> @@ -312,14 +359,21 @@ static bool remove_migration_pte(struct folio
> *folio,
>   * Get rid of all migration entries and replace them by
>   * references to the indicated page.
>   */
> -void remove_migration_ptes(struct folio *src, struct folio *dst,
> bool locked)
> +void remove_migration_ptes(struct folio *src, struct folio *dst, int
> flags)
>  {
> +	struct rmap_walk_arg rmap_walk_arg = {
> +		.folio = src,
> +		.map_unused_to_zeropage = flags &
> RMP_USE_SHARED_ZEROPAGE,
> +	};
> +
>  	struct rmap_walk_control rwc = {
>  		.rmap_one = remove_migration_pte,
> -		.arg = src,
> +		.arg = &rmap_walk_arg,
>  	};
>  
> -	if (locked)
> +	VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src !=
> dst), src);
> +
> +	if (flags & RMP_LOCKED)
>  		rmap_walk_locked(dst, &rwc);
>  	else
>  		rmap_walk(dst, &rwc);
> @@ -934,7 +988,7 @@ static int writeout(struct address_space
> *mapping, struct folio *folio)
>  	 * At this point we know that the migration attempt cannot
>  	 * be successful.
>  	 */
> -	remove_migration_ptes(folio, folio, false);
> +	remove_migration_ptes(folio, folio, 0);
>  
>  	rc = mapping->a_ops->writepage(&folio->page, &wbc);
>  
> @@ -1098,7 +1152,7 @@ static void migrate_folio_undo_src(struct folio
> *src,
>  				   struct list_head *ret)
>  {
>  	if (page_was_mapped)
> -		remove_migration_ptes(src, src, false);
> +		remove_migration_ptes(src, src, 0);
>  	/* Drop an anon_vma reference if we took one */
>  	if (anon_vma)
>  		put_anon_vma(anon_vma);
> @@ -1336,7 +1390,7 @@ static int migrate_folio_move(free_folio_t
> put_new_folio, unsigned long private,
>  		lru_add_drain();
>  
>  	if (old_page_state & PAGE_WAS_MAPPED)
> -		remove_migration_ptes(src, dst, false);
> +		remove_migration_ptes(src, dst, 0);
>  
>  out_unlock_both:
>  	folio_unlock(dst);
> @@ -1474,7 +1528,7 @@ static int unmap_and_move_huge_page(new_folio_t
> get_new_folio,
>  
>  	if (page_was_mapped)
>  		remove_migration_ptes(src,
> -			rc == MIGRATEPAGE_SUCCESS ? dst : src,
> false);
> +			rc == MIGRATEPAGE_SUCCESS ? dst : src, 0);
>  
>  unlock_put_anon:
>  	folio_unlock(dst);
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index 8d687de88a03..9cf26592ac93 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -424,7 +424,7 @@ static unsigned long
> migrate_device_unmap(unsigned long *src_pfns,
>  			continue;
>  
>  		folio = page_folio(page);
> -		remove_migration_ptes(folio, folio, false);
> +		remove_migration_ptes(folio, folio, 0);
>  
>  		src_pfns[i] = 0;
>  		folio_unlock(folio);
> @@ -840,7 +840,7 @@ void migrate_device_finalize(unsigned long
> *src_pfns,
>  			dst = src;
>  		}
>  
> -		remove_migration_ptes(src, dst, false);
> +		remove_migration_ptes(src, dst, 0);
>  		folio_unlock(src);
>  
>  		if (folio_is_zone_device(src))

Hi,

This patch has been in the mainline for some time, but we recently
discovered an issue when both mTHP and MTE (Memory Tagging Extension)
are enabled.

It seems that remapping to the same zeropage might causes MTE tag
mismatches, since MTE tags are associated with physical addresses.

In Android, the tombstone is as follows:

---
Build fingerprint:
'alps/vext_k6993v1_64/k6993v1_64:16/BP2A.250605.031.A3/mp1cs1ofp41:user
debug/dev-keys'
Revision: '0'
ABI: 'arm64'
Timestamp: 2025-08-12 04:58:28.507086720+0800
Process uptime: 0s
Cmdline: /system/bin/audioserver
pid: 8217, tid: 8882, name: binder:8217_4  >>> /system/bin/audioserver
<<<
uid: 1041
tagged_addr_ctrl: 000000000007fff3 (PR_TAGGED_ADDR_ENABLE,
PR_MTE_TCF_SYNC, mask 0xfffe)
signal 11 (SIGSEGV), code 9 (SEGV_MTESERR), fault addr
0x0a00007055220000
Cause: [MTE]: Buffer Overflow, 14016 bytes into a 23070-byte allocation
at 0x705521c940
    x0  0a0000705521c940  x1  0300006f75210ab0  x2  00000000000022a5 
x3  0a0000705521ffc0
    x4  0300006f75212de5  x5  0a000070552222f5  x6  0000000000005a1e 
x7  0000000000000000
    x8  339c000005a1e11e  x9  00000000f041339c  x10 000000009dd48904 
x11 000000000000ffff
    x12 0000000022b70889  x13 000000004cc0b2ff  x14 0000000000000000 
x15 0000000000000010
    x16 00000071cc5d8fc0  x17 00000071cc54e040  x18 0000006ef7bd4000 
x19 0300006f7520d430
    x20 00000071cc5e0340  x21 0000000000005a1e  x22 0a0000705521c940 
x23 00000000000059b5
    x24 00000000000000b1  x25 0300006f75212e4e  x26 caa20000059b511d 
x27 000000000000001d
    x28 0300006f75212e30  x29 0000006f1fe385f0
    lr  00000071cc54200c  sp  0000006f1fe385c0  pc  00000071cc54e158 
pst 0000000020001000

26 total frames
backtrace:
      #00 pc 000000000006c158 
/apex/com.android.runtime/lib64/bionic/libc.so
(__memcpy_aarch64_simd+280) (BuildId: 1e819f3e369d59be98bee38a8fbd0322)
      #01 pc 0000000000060008 
/apex/com.android.runtime/lib64/bionic/libc.so
(scudo::Allocator<scudo::AndroidNormalConfig,
&scudo_malloc_postinit>::reallocate(void*, unsigned long, unsigned
long)+696) (BuildId: 1e819f3e369d59be98bee38a8fbd0322)
      #02 pc 000000000005fccc 
/apex/com.android.runtime/lib64/bionic/libc.so (scudo_realloc+44)
(BuildId: 1e819f3e369d59be98bee38a8fbd0322)
      #03 pc 000000000005c2cc 
/apex/com.android.runtime/lib64/bionic/libc.so (LimitRealloc(void*,
unsigned long)+124) (BuildId: 1e819f3e369d59be98bee38a8fbd0322)
      #04 pc 0000000000059a90 
/apex/com.android.runtime/lib64/bionic/libc.so (realloc+160) (BuildId:
1e819f3e369d59be98bee38a8fbd0322)
      #05 pc 0000000000011a74  /system/lib64/libutils.so
(android::SharedBuffer::editResize(unsigned long) const+68) (BuildId:
7aa2d71e030a290c8dd28236ba0a838f)
      #06 pc 0000000000011ba8  /system/lib64/libutils.so
(android::String8::real_append(char const*, unsigned long)+88)
(BuildId: 7aa2d71e030a290c8dd28236ba0a838f)
      #07 pc 000000000007b880 
/system/lib64/libaudiopolicycomponents.so
(android::DeviceDescriptor::dump(android::String8*, int, bool)
const+208) (BuildId: 553fefffdca2f3a5dde634e123bd2c81)
      #08 pc 000000000008094c 
/system/lib64/libaudiopolicycomponents.so
(android::DeviceVector::dump(android::String8*, android::String8
const&, int, bool) const+636) (BuildId:
553fefffdca2f3a5dde634e123bd2c81)
      #09 pc 0000000000092ed4 
/system/lib64/libaudiopolicycomponents.so
(android::IOProfile::dump(android::String8*, int) const+980) (BuildId:
553fefffdca2f3a5dde634e123bd2c81)
      #10 pc 000000000008bd7c 
/system/lib64/libaudiopolicycomponents.so
(android::HwModule::dump(android::String8*, int) const+1148) (BuildId:
553fefffdca2f3a5dde634e123bd2c81)
      #11 pc 000000000009044c 
/system/lib64/libaudiopolicycomponents.so
(android::HwModuleCollection::dump(android::String8*) const+508)
(BuildId: 553fefffdca2f3a5dde634e123bd2c81)
      #12 pc 0000000000090134 
/system/lib64/libaudiopolicymanagerdefault.so
(android::AudioPolicyManager::dump(android::String8*) const+3908)
(BuildId: fdba879fc1a0c470759bfeb3d594ab81)
      #13 pc 0000000000092e40 
/system/lib64/libaudiopolicymanagerdefault.so
(android::AudioPolicyManager::dump(int)+80) (BuildId:
fdba879fc1a0c470759bfeb3d594ab81)
      #14 pc 000000000022b218  /system/bin/audioserver
(android::AudioPolicyService::dump(int,
android::Vector<android::String16> const&)+392) (BuildId:
1988c27ce74b125f598a07a93367cfdd)
      #15 pc 000000000022c8cc  /system/bin/audioserver (non-virtual
thunk to android::AudioPolicyService::dump(int,
android::Vector<android::String16> const&)+12) (BuildId:
1988c27ce74b125f598a07a93367cfdd)
      #16 pc 00000000000883f4  /system/lib64/libbinder.so
(android::BBinder::onTransact(unsigned int, android::Parcel const&,
android::Parcel*, unsigned int)+340) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
      #17 pc 000000000003fadc  /system/lib64/audiopolicy-aidl-cpp.so
(android::media::BnAudioPolicyService::onTransact(unsigned int,
android::Parcel const&, android::Parcel*, unsigned int)+19884)
(BuildId: ae185d80e4e54668275f262317dc2d7d)
      #18 pc 000000000022adc4  /system/bin/audioserver
(android::AudioPolicyService::onTransact(unsigned int, android::Parcel
const&, android::Parcel*, unsigned int)+1076) (BuildId:
1988c27ce74b125f598a07a93367cfdd)
      #19 pc 0000000000048adc  /system/lib64/libbinder.so
(android::IPCThreadState::executeCommand(int)+748) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
      #20 pc 0000000000051788  /system/lib64/libbinder.so
(android::IPCThreadState::joinThreadPool(bool)+296) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
      #21 pc 000000000007e528  /system/lib64/libbinder.so
(android::PoolThread::threadLoop()+24) (BuildId:
4ace0dcb0135b71ba70b7aaee457d26f)
      #22 pc 0000000000019268  /system/lib64/libutils.so
(android::Thread::_threadLoop(void*)+248) (BuildId:
7aa2d71e030a290c8dd28236ba0a838f)
      #23 pc 000000000001b994  /system/lib64/libutils.so
(libutil_thread_trampoline(void*)
(.__uniq.226528677032898775202282855395389835431)+20) (BuildId:
7aa2d71e030a290c8dd28236ba0a838f)
      #24 pc 0000000000083c8c 
/apex/com.android.runtime/lib64/bionic/libc.so
(__pthread_start(void*)+236) (BuildId:
1e819f3e369d59be98bee38a8fbd0322)
      #25 pc 00000000000761a0 
/apex/com.android.runtime/lib64/bionic/libc.so (__start_thread+64)
(BuildId: 1e819f3e369d59be98bee38a8fbd0322)

Memory tags around the fault address (0xa00007055220000), one tag per
16 bytes:
      0x705521f800: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521f900: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521fa00: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521fb00: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521fc00: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521fd00: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521fe00: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
      0x705521ff00: a  a  a  a  a  a  a  a  a  a  a  a  a  a  a  a
    =>0x7055220000:[0] 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220100: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220200: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220300: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220400: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220500: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220600: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
      0x7055220700: 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
---

Whenever the memory pressure is high, it will happen to any process
with MTE enabled.

Any suggestion is appreciated.

Thanks,
Qun-wei

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ