lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <166876785552.222254.4403222906022558715.stgit@warthog.procyon.org.uk>
Date:   Fri, 18 Nov 2022 10:37:35 +0000
From:   David Howells <dhowells@...hat.com>
To:     linux-afs@...ts.infradead.org
Cc:     Marc Dionne <marc.dionne@...istor.com>,
        Christoph Hellwig <hch@....de>,
        Matthew Wilcox <willy@...radead.org>,
        Theodore Ts'o <tytso@....edu>, dhowells@...hat.com,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [RFC PATCH] afs: Stop implementing ->writepage()

We're trying to get rid of the ->writepage() hook[1].  Stop afs from using
it by unlocking the page and calling filemap_fdatawrite_wbc() rather than
folio_write_one().  We drop the folio lock so that writeback can include
pages on both sides of the target page in the write without risking
deadlock.

A hint flag is added to the writeback_control struct so that a filesystem
can say that the write is triggered by write_begin seeing a conflicting
write.  This causes do_writepages() to do a single pass of the loop only.

This requires ->migrate_folio() to be implemented, so point that at
filemap_migrate_folio() for files and also for symlinks and directories.

A couple of questions:

 (1) afs_write_back_from_locked_folio() could be called directly rather
     than calling filemap_fdatawrite_wbc(), but that would avoid the
     control group stuff that wbc_attach_and_unlock_inode() and co. seem to
     do.  Do I actually need to do this?

 (2) afs_writepages_region() has a loop in it to generate multiple writes.
     do_writepages() also acquired a loop[2] which will also generate
     multiple writes.  Should I remove the loop from
     afs_writepages_region() and leave it to the caller of ->writepages()?

Signed-off-by: David Howells <dhowells@...hat.com>
cc: Marc Dionne <marc.dionne@...istor.com>
cc: Christoph Hellwig <hch@....de>
cc: Matthew Wilcox <willy@...radead.org>
cc: Theodore Ts'o <tytso@....edu>
cc: linux-afs@...ts.infradead.org
Link: https://lore.kernel.org/r/20221113162902.883850-1-hch@lst.de/ [1]
Link: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=80a2ea9f85850f1cdae814be03b4a16c3d3abc00 [2]
---

 fs/afs/dir.c              |    1 +
 fs/afs/file.c             |    3 +-
 fs/afs/write.c            |   63 +++++++++++++++++++++++----------------------
 include/linux/writeback.h |    1 +
 mm/page-writeback.c       |    3 +-
 5 files changed, 38 insertions(+), 33 deletions(-)

diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 230c2d19116d..baed7b095087 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -77,6 +77,7 @@ const struct address_space_operations afs_dir_aops = {
 	.dirty_folio	= afs_dir_dirty_folio,
 	.release_folio	= afs_dir_release_folio,
 	.invalidate_folio = afs_dir_invalidate_folio,
+	.migrate_folio	= filemap_migrate_folio,
 };
 
 const struct dentry_operations afs_fs_dentry_operations = {
diff --git a/fs/afs/file.c b/fs/afs/file.c
index d1cfb235c4b9..a2325e0b9d38 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -58,14 +58,15 @@ const struct address_space_operations afs_file_aops = {
 	.invalidate_folio = afs_invalidate_folio,
 	.write_begin	= afs_write_begin,
 	.write_end	= afs_write_end,
-	.writepage	= afs_writepage,
 	.writepages	= afs_writepages,
+	.migrate_folio	= filemap_migrate_folio,
 };
 
 const struct address_space_operations afs_symlink_aops = {
 	.read_folio	= afs_symlink_read_folio,
 	.release_folio	= afs_release_folio,
 	.invalidate_folio = afs_invalidate_folio,
+	.migrate_folio	= filemap_migrate_folio,
 };
 
 static const struct vm_operations_struct afs_vm_ops = {
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9ebdd36eaf2f..38d02ead3f38 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -38,6 +38,24 @@ static void afs_folio_start_fscache(bool caching, struct folio *folio)
 }
 #endif
 
+/*
+ * Flush out a conflicting write.  This may extend the write to the surrounding
+ * pages if also dirty and contiguous to the conflicting region..
+ */
+static int afs_flush_conflicting_write(struct address_space *mapping,
+				       struct folio *folio)
+{
+	struct writeback_control wbc = {
+		.sync_mode	= WB_SYNC_ALL,
+		.nr_to_write	= LONG_MAX,
+		.range_start	= folio_pos(folio),
+		.range_end	= LLONG_MAX,
+		.for_write_begin = true,
+	};
+
+	return filemap_fdatawrite_wbc(mapping, &wbc);
+}
+
 /*
  * prepare to perform part of a write to a page
  */
@@ -80,7 +98,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 
 		if (folio_test_writeback(folio)) {
 			trace_afs_folio_dirty(vnode, tracepoint_string("alrdy"), folio);
-			goto flush_conflicting_write;
+			folio_unlock(folio);
+			goto wait_for_writeback;
 		}
 		/* If the file is being filled locally, allow inter-write
 		 * spaces to be merged into writes.  If it's not, only write
@@ -99,8 +118,15 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
 	 * flush the page out.
 	 */
 flush_conflicting_write:
-	_debug("flush conflict");
-	ret = folio_write_one(folio);
+	trace_afs_folio_dirty(vnode, tracepoint_string("confl"), folio);
+	folio_unlock(folio);
+
+	ret = afs_flush_conflicting_write(mapping, folio);
+	if (ret < 0)
+		goto error;
+
+wait_for_writeback:
+	ret = folio_wait_writeback_killable(folio);
 	if (ret < 0)
 		goto error;
 
@@ -663,34 +689,6 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping,
 	return ret;
 }
 
-/*
- * write a page back to the server
- * - the caller locked the page for us
- */
-int afs_writepage(struct page *subpage, struct writeback_control *wbc)
-{
-	struct folio *folio = page_folio(subpage);
-	ssize_t ret;
-	loff_t start;
-
-	_enter("{%lx},", folio_index(folio));
-
-#ifdef CONFIG_AFS_FSCACHE
-	folio_wait_fscache(folio);
-#endif
-
-	start = folio_index(folio) * PAGE_SIZE;
-	ret = afs_write_back_from_locked_folio(folio_mapping(folio), wbc,
-					       folio, start, LLONG_MAX - start);
-	if (ret < 0) {
-		_leave(" = %zd", ret);
-		return ret;
-	}
-
-	_leave(" = 0");
-	return 0;
-}
-
 /*
  * write a region of pages back to the server
  */
@@ -775,6 +773,9 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		start += ret;
 
+		if (wbc->for_write_begin)
+			break;
+
 		cond_resched();
 	} while (wbc->nr_to_write > 0);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 06f9291b6fd5..3832ac3425c8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -61,6 +61,7 @@ struct writeback_control {
 	unsigned range_cyclic:1;	/* range_start is cyclic */
 	unsigned for_sync:1;		/* sync(2) WB_SYNC_ALL writeback */
 	unsigned unpinned_fscache_wb:1;	/* Cleared I_PINNING_FSCACHE_WB */
+	unsigned for_write_begin:1;	/* Flush conflicting write */
 
 	/*
 	 * When writeback IOs are bounced through async layers, only the
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7e9d8d857ecc..04c65b8b4ded 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2469,7 +2469,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 			ret = mapping->a_ops->writepages(mapping, wbc);
 		else
 			ret = generic_writepages(mapping, wbc);
-		if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL))
+		if ((ret != -ENOMEM) || (wbc->sync_mode != WB_SYNC_ALL) ||
+		    wbc->for_write_begin)
 			break;
 
 		/*


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ