lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250724084441.380404-10-link@vivo.com>
Date: Thu, 24 Jul 2025 16:44:37 +0800
From: Huan Yang <link@...o.com>
To: Andrew Morton <akpm@...ux-foundation.org>,
	David Hildenbrand <david@...hat.com>,
	Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
	Rik van Riel <riel@...riel.com>,
	"Liam R. Howlett" <Liam.Howlett@...cle.com>,
	Vlastimil Babka <vbabka@...e.cz>,
	Harry Yoo <harry.yoo@...cle.com>,
	Xu Xin <xu.xin16@....com.cn>,
	Chengming Zhou <chengming.zhou@...ux.dev>,
	Mike Rapoport <rppt@...nel.org>,
	Suren Baghdasaryan <surenb@...gle.com>,
	Michal Hocko <mhocko@...e.com>,
	Zi Yan <ziy@...dia.com>,
	Matthew Brost <matthew.brost@...el.com>,
	Joshua Hahn <joshua.hahnjy@...il.com>,
	Rakie Kim <rakie.kim@...com>,
	Byungchul Park <byungchul@...com>,
	Gregory Price <gourry@...rry.net>,
	Ying Huang <ying.huang@...ux.alibaba.com>,
	Alistair Popple <apopple@...dia.com>,
	"Matthew Wilcox (Oracle)" <willy@...radead.org>,
	Huan Yang <link@...o.com>,
	Christian Brauner <brauner@...nel.org>,
	Usama Arif <usamaarif642@...il.com>,
	Yu Zhao <yuzhao@...gle.com>,
	Baolin Wang <baolin.wang@...ux.alibaba.com>,
	linux-mm@...ck.org,
	linux-kernel@...r.kernel.org,
	"Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>
Cc: Qianfeng Rong <rongqianfeng@...o.com>
Subject: [RFC PATCH 9/9] mm/migrate: apply migrate entry page_type

When a single-page folio is already unmapped, we can reuse the page_type
field by setting it to PGTY_mgt_entry.This indicates that the folio is
in a critical state where no PFNs for this folio exist in the system,
with migrate entries used instead.

The lower 24 bits of this field can be utilized to store the count of
migrate entries during try_to_migrate.

It's important to note that we need to initialize the folio's page_type
to PGTY_mgt_entry and set the migrate entry count only while holding the
rmap walk lock.This is because during the lock period, we can prevent
new VMA fork (which would increase migrate entries) and VMA unmap
(which would decrease migrate entries).

After a folio exits try_to_migrate and before remove_migration_ptes
acquires the rmap lock, the system can perform normal fork and unmap
operations. Therefore, we need to increment or decrement the migrate
entry count recorded in the folio (if it's of type PGTY_mgt_entry) when
handling copy/zap_nonpresent_pte.

When performing remove_migration_ptes during migration to start removing
migrate entries, we need to dynamically decrement the recorded migrate
entry count. Once this count reaches zero, it indicates there are no
remaining migrate entries in the associated VMAs that need to be cleared
and replaced with the destination PFN. This allows us to safely
terminate the VMA traversal early.

However, it's important to note that if issues occur during migration
requiring an undo operation, PGTY_mgt_entry can no longer be used. This
is because the dst needs to be set back to the src, and the presence of
PGTY_mgt_entry would interfere with the normal usage of mapcount when
setup rmap info.

Signed-off-by: Huan Yang <link@...o.com>
Signed-off-by: Qianfeng Rong <rongqianfeng@...o.com>
---
 mm/memory.c  |  2 ++
 mm/migrate.c | 28 +++++++++++++++++++++++++++-
 mm/rmap.c    | 17 +++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/mm/memory.c b/mm/memory.c
index b4a7695b1e31..f9d71b118c11 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -861,6 +861,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 				pte = pte_swp_mkuffd_wp(pte);
 			set_pte_at(src_mm, addr, src_pte, pte);
 		}
+		folio_inc_mgte_count(folio);
 	} else if (is_device_private_entry(entry)) {
 		page = pfn_swap_entry_to_page(entry);
 		folio = page_folio(page);
@@ -1651,6 +1652,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb,
 		if (!should_zap_folio(details, folio))
 			return 1;
 		rss[mm_counter(folio)]--;
+		folio_dec_mgte_count(folio);
 	} else if (pte_marker_entry_uffd_wp(entry)) {
 		/*
 		 * For anon: always drop the marker; for file: only
diff --git a/mm/migrate.c b/mm/migrate.c
index a5ea8fba2997..fc2fac1559bd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -241,7 +241,8 @@ static bool remove_migration_pte(struct folio *folio,
 		struct vm_area_struct *vma, unsigned long addr, void *arg)
 {
 	struct rmap_walk_arg *rmap_walk_arg = arg;
-	DEFINE_FOLIO_VMA_WALK(pvmw, rmap_walk_arg->src, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
+	struct folio *src = rmap_walk_arg->src;
+	DEFINE_FOLIO_VMA_WALK(pvmw, src, vma, addr, PVMW_SYNC | PVMW_MIGRATION);
 
 	while (page_vma_mapped_walk(&pvmw)) {
 		rmap_t rmap_flags = RMAP_NONE;
@@ -334,6 +335,7 @@ static bool remove_migration_pte(struct folio *folio,
 
 		trace_remove_migration_pte(pvmw.address, pte_val(pte),
 					   compound_order(new));
+		folio_dec_mgte_count(src);
 
 		/* No need to invalidate - it was non-present before */
 		update_mmu_cache(vma, pvmw.address, pvmw.pte);
@@ -342,12 +344,27 @@ static bool remove_migration_pte(struct folio *folio,
 	return true;
 }
 
+static int folio_removed_all_migrate_entry(struct folio *folio,
+					   struct rmap_walk_control *rwc)
+{
+	struct rmap_walk_arg *arg = (struct rmap_walk_arg *)rwc->arg;
+	struct folio *src = arg->src;
+
+	VM_BUG_ON(!folio_test_mgt_entry(src));
+
+	if (!folio_get_mgte_count(src))
+		return true;
+	return false;
+}
+
 /*
  * Get rid of all migration entries and replace them by
  * references to the indicated page.
  */
 void remove_migration_ptes(struct folio *src, struct folio *dst, int flags)
 {
+	bool undo = src == dst;
+
 	struct rmap_walk_arg rmap_walk_arg = {
 		.src = src,
 		.map_unused_to_zeropage = flags & RMP_USE_SHARED_ZEROPAGE,
@@ -356,12 +373,21 @@ void remove_migration_ptes(struct folio *src, struct folio *dst, int flags)
 	struct rmap_walk_control rwc = {
 		.rmap_one = remove_migration_pte,
 		.locked = flags & RMP_LOCKED,
+		.done = !undo && folio_test_mgt_entry(src) ?
+				folio_removed_all_migrate_entry :
+				NULL,
 		.arg = &rmap_walk_arg,
 	};
 
 	VM_BUG_ON_FOLIO((flags & RMP_USE_SHARED_ZEROPAGE) && (src != dst), src);
 
+	if (undo)
+		folio_remove_mgte(src);
+
 	rmap_walk(dst, &rwc);
+
+	if (!undo)
+		folio_remove_mgte(src);
 }
 
 /*
diff --git a/mm/rmap.c b/mm/rmap.c
index 2433e12c131d..f3911491b2d9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2263,6 +2263,7 @@ void try_to_unmap(struct folio *folio, enum ttu_flags flags)
 
 struct migrate_walk_arg {
 	enum ttu_flags flags;
+	unsigned int nr_migrate_entry;
 };
 
 /*
@@ -2282,6 +2283,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 	struct mmu_notifier_range range;
 	struct migrate_walk_arg *mwa = (struct migrate_walk_arg *)arg;
 	enum ttu_flags flags = mwa->flags;
+	unsigned int nr_migrate_entry = 0;
 	unsigned long pfn;
 	unsigned long hsz = 0;
 
@@ -2548,6 +2550,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 						hsz);
 			else
 				set_pte_at(mm, address, pvmw.pte, swp_pte);
+			nr_migrate_entry++;
 			trace_set_migration_pte(address, pte_val(swp_pte),
 						folio_order(folio));
 			/*
@@ -2565,11 +2568,24 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 		folio_put(folio);
 	}
 
+	mwa->nr_migrate_entry += nr_migrate_entry;
+
 	mmu_notifier_invalidate_range_end(&range);
 
 	return ret;
 }
 
+static void folio_set_migrate_entry_type(struct folio *folio,
+					 struct rmap_walk_control *rwc)
+{
+	struct migrate_walk_arg *mwa = (struct migrate_walk_arg *)rwc->arg;
+	unsigned int nr_migrate_entry = mwa->nr_migrate_entry;
+
+	if (nr_migrate_entry && !folio_test_large(folio) &&
+	    !folio_mapped(folio))
+		folio_init_mgte(folio, nr_migrate_entry);
+}
+
 /**
  * try_to_migrate - try to replace all page table mappings with swap entries
  * @folio: the folio to replace page table entries for
@@ -2588,6 +2604,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
 		.rmap_one = try_to_migrate_one,
 		.arg = (void *)&arg,
 		.done = folio_not_mapped,
+		.exit = folio_set_migrate_entry_type,
 		.locked = flags & TTU_RMAP_LOCKED,
 		.anon_lock = folio_lock_anon_vma_read,
 	};
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ