lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1bfac47d0477ad5f13a8daac673d4d4c415645ca.1742099301.git-series.apopple@nvidia.com>
Date: Sun, 16 Mar 2025 15:29:27 +1100
From: Alistair Popple <apopple@...dia.com>
To: linux-mm@...ck.org
Cc: linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	Alistair Popple <apopple@...dia.com>
Subject: [PATCH RFC 4/6] mm: Implement writeback for share device private pages

Currently devices can't write to shared filebacked device private pages
and any writes will be lost. This is because when a device private
pagecache page is migrated back to the CPU the contents are always
reloaded from backing storage.

To allow data written by the device to be migrated back add a new pgmap
call, migrate_to_pagecache(), which will be called when a device private
entry is found in the page cache to copy the data back from the device
to the new pagecache page.

Because the page was clean when migrating to the device we need to
inform the filesystem that the pages needs to be writable. Drivers are
expected to do this by calling set_page_dirty() on the new page if it
was written to in the migrate_to_pagecache() callback.

Signed-off-by: Alistair Popple <apopple@...dia.com>
---
 include/linux/memremap.h |  2 ++-
 mm/migrate.c             |  2 +-
 mm/migrate_device.c      | 54 ++++++++++++++++++++++++++++-------------
 3 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 3f7143a..d921db2 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -89,6 +89,8 @@ struct dev_pagemap_ops {
 	 */
 	vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf);
 
+	int (*migrate_to_pagecache)(struct page *page, struct page *newpage);
+
 	/*
 	 * Handle the memory failure happens on a range of pfns.  Notify the
 	 * processes who are using these pfns, and try to recover the data on
diff --git a/mm/migrate.c b/mm/migrate.c
index 21f92eb..c660151 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1006,7 +1006,7 @@ int fallback_migrate_folio(struct address_space *mapping,
 		struct folio *dst, struct folio *src, enum migrate_mode mode,
 		int extra_count)
 {
-	if (folio_test_dirty(src)) {
+	if (!folio_is_device_private(src) && folio_test_dirty(src)) {
 		/* Only writeback folios in full synchronous migration */
 		switch (mode) {
 		case MIGRATE_SYNC:
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 946e9fd..9aeba66 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -160,6 +160,17 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 				goto next;
 			mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
 			mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
+
+			/*
+			 * Tell the driver it may write to the PTE. Normally
+			 * page_mkwrite() would need to be called to upgrade a
+			 * read-only to writable PTE for a folio with mappings.
+			 * So the driver is responsible for marking the page dirty
+			 * with set_page_dirty() if it does actually write to
+			 * the page.
+			 */
+			mpfn |= vma->vm_flags & VM_WRITE && page->mapping ?
+				MIGRATE_PFN_WRITE : 0;
 		}
 
 		/* FIXME support THP */
@@ -240,6 +251,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
 					entry = make_migration_entry_dirty(entry);
 				}
 			}
+			entry = make_migration_entry_dirty(entry);
 			swp_pte = swp_entry_to_pte(entry);
 			if (pte_present(pte)) {
 				if (pte_soft_dirty(pte))
@@ -898,14 +910,15 @@ void migrate_device_page(struct page *page)
 	int ret;
 	struct page *newpage;
 
-	WARN_ON(!is_device_private_page(page));
+	if (WARN_ON_ONCE(!is_device_private_page(page)))
+		return;
+
+	lock_page(page);
 
 	/*
-	 * We don't support writeback of dirty pages from the driver yet.
+	 * TODO: It would be nice to have the driver call some version of this
+	 * (migrate_device_range()?)  so it can expand the region.
 	 */
-	WARN_ON(PageDirty(page));
-
-	lock_page(page);
 	try_to_migrate(page_folio(page), 0);
 
 	/*
@@ -932,18 +945,27 @@ void migrate_device_page(struct page *page)
 	WARN_ON_ONCE(ret != MIGRATEPAGE_SUCCESS);
 	page->mapping = NULL;
 
-	/*
-	 * We're going to read the newpage back from disk so make it not
-	 * uptodate.
-	 */
-	ClearPageUptodate(newpage);
+	if (page->pgmap->ops->migrate_to_pagecache)
+		ret = page->pgmap->ops->migrate_to_pagecache(page, newpage);
 
-	/*
-	 * IO will unlock newpage asynchronously.
-	 */
-	folio_mapping(page_folio(newpage))->a_ops->read_folio(NULL,
-						page_folio(newpage));
-	lock_page(newpage);
+	/* Fallback to reading page from disk */
+	if (!page->pgmap->ops->migrate_to_pagecache || ret) {
+		if (WARN_ON_ONCE(PageDirty(newpage)))
+			ClearPageDirty(newpage);
+
+		/*
+		 * We're going to read the newpage back from disk so make it not
+		 * uptodate.
+		 */
+		ClearPageUptodate(newpage);
+
+		/*
+		 * IO will unlock newpage asynchronously.
+		 */
+		folio_mapping(page_folio(newpage))->a_ops->read_folio(NULL,
+							page_folio(newpage));
+		lock_page(newpage);
+	}
 
 	remove_migration_ptes(page_folio(page), page_folio(newpage), false);
 
-- 
git-series 0.9.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ