lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20251128044146.80050-5-jniethe@nvidia.com>
Date: Fri, 28 Nov 2025 15:41:44 +1100
From: Jordan Niethe <jniethe@...dia.com>
To: linux-mm@...ck.org
Cc: balbirs@...dia.com,
	matthew.brost@...el.com,
	akpm@...ux-foundation.org,
	linux-kernel@...r.kernel.org,
	dri-devel@...ts.freedesktop.org,
	david@...hat.com,
	ziy@...dia.com,
	apopple@...dia.com,
	lorenzo.stoakes@...cle.com,
	lyude@...hat.com,
	dakr@...nel.org,
	airlied@...il.com,
	simona@...ll.ch,
	rcampbell@...dia.com,
	mpenttil@...hat.com,
	jgg@...dia.com,
	willy@...radead.org
Subject: [RFC PATCH 4/6] mm: Add a new swap type for migration entries with device private PFNs

A future change will remove device private pages from the physical
address space. This will mean that device private pages no longer have
normal PFN and must be handled separately.

When migrating a device private page a migration entry is created for
that page - this includes the PFN for that page. Once device private
PFNs exist in a different address space to regular PFNs we need to be
able to determine which kind of PFN is in the entry so we can associate
it with the correct page.

Introduce new swap types:

  - SWP_MIGRATION_DEVICE_READ
  - SWP_MIGRATION_DEVICE_WRITE
  - SWP_MIGRATION_DEVICE_READ_EXCLUSIVE

These correspond to

  - SWP_MIGRATION_READ
  - SWP_MIGRATION_WRITE
  - SWP_MIGRATION_READ_EXCLUSIVE

except the swap entry contains a device private PFN.

The existing helpers such as is_writable_migration_entry() will still
return true for a SWP_MIGRATION_DEVICE_WRITE entry.

Introduce new helpers such as
is_writable_device_migration_private_entry() to disambiguate between a
SWP_MIGRATION_WRITE and a SWP_MIGRATION_DEVICE_WRITE entry.

Signed-off-by: Jordan Niethe <jniethe@...dia.com>
Signed-off-by: Alistair Popple <apopple@...dia.com>
---
 include/linux/swap.h    |  8 +++-
 include/linux/swapops.h | 87 ++++++++++++++++++++++++++++++++++++++---
 mm/memory.c             |  9 ++++-
 mm/migrate.c            |  2 +-
 mm/migrate_device.c     | 31 ++++++++++-----
 mm/mprotect.c           | 21 +++++++---
 mm/page_vma_mapped.c    |  2 +-
 mm/pagewalk.c           |  3 +-
 mm/rmap.c               | 32 ++++++++++-----
 9 files changed, 161 insertions(+), 34 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index e818fbade1e2..87f14d673979 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -74,12 +74,18 @@ static inline int current_is_kswapd(void)
  *
  * When a page is mapped by the device for exclusive access we set the CPU page
  * table entries to a special SWP_DEVICE_EXCLUSIVE entry.
+ *
+ * Because device private pages do not use regular PFNs, special migration
+ * entries are also needed.
  */
 #ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 3
+#define SWP_DEVICE_NUM 6
 #define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
 #define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
 #define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
+#define SWP_MIGRATION_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
+#define SWP_MIGRATION_DEVICE_READ_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+4)
+#define SWP_MIGRATION_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+5)
 #else
 #define SWP_DEVICE_NUM 0
 #endif
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 64ea151a7ae3..7aa3f00e304a 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -196,6 +196,43 @@ static inline bool is_device_exclusive_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_DEVICE_EXCLUSIVE;
 }
 
+static inline swp_entry_t make_readable_migration_device_private_entry(pgoff_t offset)
+{
+	return swp_entry(SWP_MIGRATION_DEVICE_READ, offset);
+}
+
+static inline swp_entry_t make_writable_migration_device_private_entry(pgoff_t offset)
+{
+	return swp_entry(SWP_MIGRATION_DEVICE_WRITE, offset);
+}
+
+static inline bool is_device_private_migration_entry(swp_entry_t entry)
+{
+	return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ ||
+			swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE ||
+			swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
+}
+
+static inline bool is_readable_device_migration_private_entry(swp_entry_t entry)
+{
+	return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_READ);
+}
+
+static inline bool is_writable_device_migration_private_entry(swp_entry_t entry)
+{
+	return unlikely(swp_type(entry) == SWP_MIGRATION_DEVICE_WRITE);
+}
+
+static inline swp_entry_t make_device_migration_readable_exclusive_migration_entry(pgoff_t offset)
+{
+	return swp_entry(SWP_MIGRATION_DEVICE_READ_EXCLUSIVE, offset);
+}
+
+static inline bool is_device_migration_readable_exclusive_entry(swp_entry_t entry)
+{
+	return swp_type(entry) == SWP_MIGRATION_DEVICE_READ_EXCLUSIVE;
+}
+
 #else /* CONFIG_DEVICE_PRIVATE */
 static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
 {
@@ -217,6 +254,11 @@ static inline bool is_writable_device_private_entry(swp_entry_t entry)
 	return false;
 }
 
+static inline bool is_readable_device_migration_private_entry(swp_entry_t entry)
+{
+	return false;
+}
+
 static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset)
 {
 	return swp_entry(0, 0);
@@ -227,6 +269,36 @@ static inline bool is_device_exclusive_entry(swp_entry_t entry)
 	return false;
 }
 
+static inline swp_entry_t make_readable_migration_device_private_entry(pgoff_t offset)
+{
+	return swp_entry(0, 0);
+}
+
+static inline swp_entry_t make_writable_migration_device_private_entry(pgoff_t offset)
+{
+	return swp_entry(0, 0);
+}
+
+static inline bool is_device_private_migration_entry(swp_entry_t entry)
+{
+	return false;
+}
+
+static inline bool is_writable_device_migration_private_entry(swp_entry_t entry)
+{
+	return false;
+}
+
+static inline swp_entry_t make_device_migration_readable_exclusive_migration_entry(pgoff_t offset)
+{
+	return swp_entry(0, 0);
+}
+
+static inline bool is_device_migration_readable_exclusive_entry(swp_entry_t entry)
+{
+	return false;
+}
+
 #endif /* CONFIG_DEVICE_PRIVATE */
 
 #ifdef CONFIG_MIGRATION
@@ -234,22 +306,26 @@ static inline int is_migration_entry(swp_entry_t entry)
 {
 	return unlikely(swp_type(entry) == SWP_MIGRATION_READ ||
 			swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE ||
-			swp_type(entry) == SWP_MIGRATION_WRITE);
+			swp_type(entry) == SWP_MIGRATION_WRITE ||
+			is_device_private_migration_entry(entry));
 }
 
 static inline int is_writable_migration_entry(swp_entry_t entry)
 {
-	return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE);
+	return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE ||
+			is_writable_device_migration_private_entry(entry));
 }
 
 static inline int is_readable_migration_entry(swp_entry_t entry)
 {
-	return unlikely(swp_type(entry) == SWP_MIGRATION_READ);
+	return unlikely(swp_type(entry) == SWP_MIGRATION_READ ||
+			is_readable_device_migration_private_entry(entry));
 }
 
 static inline int is_readable_exclusive_migration_entry(swp_entry_t entry)
 {
-	return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE);
+	return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE ||
+			is_device_migration_readable_exclusive_entry(entry));
 }
 
 static inline swp_entry_t make_readable_migration_entry(pgoff_t offset)
@@ -525,7 +601,8 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
 	BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
 
 	return is_migration_entry(entry) || is_device_private_entry(entry) ||
-	       is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
+	       is_device_exclusive_entry(entry) || is_hwpoison_entry(entry) ||
+	       is_device_private_migration_entry(entry);
 }
 
 struct page_vma_mapped_walk;
diff --git a/mm/memory.c b/mm/memory.c
index b59ae7ce42eb..f1ed361434ff 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -962,8 +962,13 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 			 * to be set to read. A previously exclusive entry is
 			 * now shared.
 			 */
-			entry = make_readable_migration_entry(
-							swp_offset(entry));
+			if (is_device_private_migration_entry(entry))
+				entry = make_readable_migration_device_private_entry(
+								swp_offset(entry));
+			else
+				entry = make_readable_migration_entry(
+								swp_offset(entry));
+
 			pte = swp_entry_to_pte(entry);
 			if (pte_swp_soft_dirty(orig_pte))
 				pte = pte_swp_mksoft_dirty(pte);
diff --git a/mm/migrate.c b/mm/migrate.c
index c0e9f15be2a2..3c561d61afba 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -495,7 +495,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 		goto out;
 
 	entry = pte_to_swp_entry(pte);
-	if (!is_migration_entry(entry))
+	if (!(is_migration_entry(entry)))
 		goto out;
 
 	migration_entry_wait_on_locked(entry, ptl);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 82f09b24d913..458b5114bb2b 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -235,15 +235,28 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 				folio_mark_dirty(folio);
 
 			/* Setup special migration page table entry */
-			if (mpfn & MIGRATE_PFN_WRITE)
-				entry = make_writable_migration_entry(
-							page_to_pfn(page));
-			else if (anon_exclusive)
-				entry = make_readable_exclusive_migration_entry(
-							page_to_pfn(page));
-			else
-				entry = make_readable_migration_entry(
-							page_to_pfn(page));
+			if (mpfn & MIGRATE_PFN_WRITE) {
+				if (is_device_private_page(page))
+					entry = make_writable_migration_device_private_entry(
+								page_to_pfn(page));
+				else
+					entry = make_writable_migration_entry(
+								page_to_pfn(page));
+			} else if (anon_exclusive) {
+				if (is_device_private_page(page))
+					entry = make_device_migration_readable_exclusive_migration_entry(
+								page_to_pfn(page));
+				else
+					entry = make_readable_exclusive_migration_entry(
+								page_to_pfn(page));
+			} else {
+				if (is_device_private_page(page))
+					entry = make_readable_migration_device_private_entry(
+								page_to_pfn(page));
+				else
+					entry = make_readable_migration_entry(
+								page_to_pfn(page));
+			}
 			if (pte_present(pte)) {
 				if (pte_young(pte))
 					entry = make_migration_entry_young(entry);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 113b48985834..7d79a0f53bf5 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -365,11 +365,22 @@ static long change_pte_range(struct mmu_gather *tlb,
 				 * A protection check is difficult so
 				 * just be safe and disable write
 				 */
-				if (folio_test_anon(folio))
-					entry = make_readable_exclusive_migration_entry(
-							     swp_offset(entry));
-				else
-					entry = make_readable_migration_entry(swp_offset(entry));
+				if (!is_writable_device_migration_private_entry(entry)) {
+					if (folio_test_anon(folio))
+						entry = make_readable_exclusive_migration_entry(
+								swp_offset(entry));
+					else
+						entry = make_readable_migration_entry(
+								swp_offset(entry));
+				} else {
+					if (folio_test_anon(folio))
+						entry = make_device_migration_readable_exclusive_migration_entry(
+								swp_offset(entry));
+					else
+						entry = make_readable_migration_device_private_entry(
+								swp_offset(entry));
+				}
+
 				newpte = swp_entry_to_pte(entry);
 				if (pte_swp_soft_dirty(oldpte))
 					newpte = pte_swp_mksoft_dirty(newpte);
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 9146bd084435..e9fe747d3df3 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -112,7 +112,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
 			return false;
 		entry = pte_to_swp_entry(ptent);
 
-		if (!is_migration_entry(entry))
+		if (!(is_migration_entry(entry)))
 			return false;
 
 		pfn = swp_offset_pfn(entry);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 9f91cf85a5be..f5c77dda3359 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -1003,7 +1003,8 @@ struct folio *folio_walk_start(struct folio_walk *fw,
 		swp_entry_t entry = pte_to_swp_entry(pte);
 
 		if ((flags & FW_MIGRATION) &&
-		    is_migration_entry(entry)) {
+		    (is_migration_entry(entry) ||
+		     is_device_private_migration_entry(entry))) {
 			page = pfn_swap_entry_to_page(entry);
 			expose_page = false;
 			goto found;
diff --git a/mm/rmap.c b/mm/rmap.c
index e94500318f92..9642a79cbdb4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2535,15 +2535,29 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			 * pte. do_swap_page() will wait until the migration
 			 * pte is removed and then restart fault handling.
 			 */
-			if (writable)
-				entry = make_writable_migration_entry(
-							page_to_pfn(subpage));
-			else if (anon_exclusive)
-				entry = make_readable_exclusive_migration_entry(
-							page_to_pfn(subpage));
-			else
-				entry = make_readable_migration_entry(
-							page_to_pfn(subpage));
+			if (writable) {
+				if (is_device_private_page(subpage))
+					entry = make_writable_migration_device_private_entry(
+								page_to_pfn(subpage));
+				else
+					entry = make_writable_migration_entry(
+								page_to_pfn(subpage));
+			} else if (anon_exclusive) {
+				if (is_device_private_page(subpage))
+					entry = make_device_migration_readable_exclusive_migration_entry(
+								page_to_pfn(subpage));
+				else
+					entry = make_readable_exclusive_migration_entry(
+								page_to_pfn(subpage));
+			} else {
+				if (is_device_private_page(subpage))
+					entry = make_readable_migration_device_private_entry(
+								page_to_pfn(subpage));
+				else
+					entry = make_readable_migration_entry(
+								page_to_pfn(subpage));
+			}
+
 			if (likely(pte_present(pteval))) {
 				if (pte_young(pteval))
 					entry = make_migration_entry_young(entry);
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ