lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <2214157.1677250083@warthog.procyon.org.uk>
Date:   Fri, 24 Feb 2023 14:48:03 +0000
From:   David Howells <dhowells@...hat.com>
To:     Linus Torvalds <torvalds@...ux-foundation.org>,
        Steve French <stfrench@...rosoft.com>
Cc:     dhowells@...hat.com, Vishal Moola <vishal.moola@...il.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Jan Kara <jack@...e.cz>, Paulo Alcantara <pc@....nz>,
        Matthew Wilcox <willy@...radead.org>,
        Huang Ying <ying.huang@...el.com>,
        Baolin Wang <baolin.wang@...ux.alibaba.com>,
        Xin Hao <xhao@...ux.alibaba.com>, linux-mm@...ck.org,
        mm-commits@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: [RFC][PATCH] cifs, afs: Revert changes to {cifs,afs}_writepages_region()

Here's a more complex patch that reverts Vishal's patch to afs and your
changes to cifs back to the point where find_get_pages_range_tag() was
being used to get a single folio and then replace that with a function,
filemap_get_folio_tag() that just gets a single folio.  An alternative way
of doing this would be to make filemap_get_folios_tag() take a limit count.

This is likely to be more efficient for the common case as
*_extend_writeback() will deal with pages that are contiguous to the
starting page before we get on to continuing to process the batch.

For filemap_get_folios_tag() to be of use, the batch has to be passed down,
and if it contains scattered, non-contiguous pages, these are likely to end
up being pinned by the batch for significant periods of time whilst I/O is
undertaken on earlier pages.
    
Fix: 3822a7c40997 ("Merge tag 'mm-stable-2023-02-20-13-37' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm")
Fix: acc8d8588cb7 ("afs: convert afs_writepages_region() to use filemap_get_folios_tag()")
Signed-off-by: David Howells <dhowells@...hat.com>
---
 fs/afs/write.c          |  116 +++++++++++++++++++++++-------------------------
 fs/cifs/file.c          |  114 ++++++++++++++++++++---------------------------
 include/linux/pagemap.h |    2 
 mm/filemap.c            |   58 ++++++++++++++++++++++++
 4 files changed, 165 insertions(+), 125 deletions(-)

diff --git a/fs/afs/write.c b/fs/afs/write.c
index 571f3b9a417e..b04a95262c4f 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -704,87 +704,83 @@ static int afs_writepages_region(struct address_space *mapping,
 				 bool max_one_loop)
 {
 	struct folio *folio;
-	struct folio_batch fbatch;
 	ssize_t ret;
-	unsigned int i;
 	int n, skips = 0;
 
 	_enter("%llx,%llx,", start, end);
-	folio_batch_init(&fbatch);
 
 	do {
 		pgoff_t index = start / PAGE_SIZE;
 
-		n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
-					PAGECACHE_TAG_DIRTY, &fbatch);
-
-		if (!n)
+		folio = filemap_get_folio_tag(mapping, &index, end / PAGE_SIZE,
+					      PAGECACHE_TAG_DIRTY);
+		if (!folio)
 			break;
-		for (i = 0; i < n; i++) {
-			folio = fbatch.folios[i];
-			start = folio_pos(folio); /* May regress with THPs */
 
-			_debug("wback %lx", folio_index(folio));
+		start = folio_pos(folio); /* May regress with THPs */
 
-			/* At this point we hold neither the i_pages lock nor the
-			 * page lock: the page may be truncated or invalidated
-			 * (changing page->mapping to NULL), or even swizzled
-			 * back from swapper_space to tmpfs file mapping
-			 */
-			if (wbc->sync_mode != WB_SYNC_NONE) {
-				ret = folio_lock_killable(folio);
-				if (ret < 0) {
-					folio_batch_release(&fbatch);
-					return ret;
-				}
-			} else {
-				if (!folio_trylock(folio))
-					continue;
-			}
+		_debug("wback %lx", folio_index(folio));
 
-			if (folio->mapping != mapping ||
-			    !folio_test_dirty(folio)) {
-				start += folio_size(folio);
-				folio_unlock(folio);
-				continue;
+		/* At this point we hold neither the i_pages lock nor the
+		 * page lock: the page may be truncated or invalidated
+		 * (changing page->mapping to NULL), or even swizzled
+		 * back from swapper_space to tmpfs file mapping
+		 */
+		if (wbc->sync_mode != WB_SYNC_NONE) {
+			ret = folio_lock_killable(folio);
+			if (ret < 0) {
+				folio_put(folio);
+				return ret;
+			}
+		} else {
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
+				return 0;
 			}
+		}
 
-			if (folio_test_writeback(folio) ||
-			    folio_test_fscache(folio)) {
-				folio_unlock(folio);
-				if (wbc->sync_mode != WB_SYNC_NONE) {
-					folio_wait_writeback(folio);
+		if (folio_mapping(folio) != mapping ||
+		    !folio_test_dirty(folio)) {
+			start += folio_size(folio);
+			folio_unlock(folio);
+			folio_put(folio);
+			continue;
+		}
+
+		if (folio_test_writeback(folio) ||
+		    folio_test_fscache(folio)) {
+			folio_unlock(folio);
+			if (wbc->sync_mode != WB_SYNC_NONE) {
+				folio_wait_writeback(folio);
 #ifdef CONFIG_AFS_FSCACHE
-					folio_wait_fscache(folio);
+				folio_wait_fscache(folio);
 #endif
-				} else {
-					start += folio_size(folio);
-				}
-				if (wbc->sync_mode == WB_SYNC_NONE) {
-					if (skips >= 5 || need_resched()) {
-						*_next = start;
-						_leave(" = 0 [%llx]", *_next);
-						return 0;
-					}
-					skips++;
-				}
-				continue;
+			} else {
+				start += folio_size(folio);
 			}
-
-			if (!folio_clear_dirty_for_io(folio))
-				BUG();
-			ret = afs_write_back_from_locked_folio(mapping, wbc,
-					folio, start, end);
-			if (ret < 0) {
-				_leave(" = %zd", ret);
-				folio_batch_release(&fbatch);
-				return ret;
+			folio_put(folio);
+			if (wbc->sync_mode == WB_SYNC_NONE) {
+				if (skips >= 5 || need_resched())
+					break;
+				skips++;
 			}
+			continue;
+		}
 
-			start += ret;
+		if (!folio_clear_dirty_for_io(folio))
+			BUG();
+		ret = afs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+		folio_put(folio);
+		if (ret < 0) {
+			_leave(" = %zd", ret);
+			return ret;
 		}
 
-		folio_batch_release(&fbatch);
+		start += ret;
+
+		if (max_one_loop)
+			break;
+
 		cond_resched();
 	} while (wbc->nr_to_write > 0);
 
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 5365a3299088..121254086e30 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2857,92 +2857,76 @@ static int cifs_writepages_region(struct address_space *mapping,
 				  struct writeback_control *wbc,
 				  loff_t start, loff_t end, loff_t *_next)
 {
-	struct folio_batch fbatch;
+	struct folio *folio;
+	ssize_t ret;
 	int skips = 0;
 
-	folio_batch_init(&fbatch);
 	do {
-		int nr;
 		pgoff_t index = start / PAGE_SIZE;
 
-		nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE,
-					    PAGECACHE_TAG_DIRTY, &fbatch);
-		if (!nr)
+		folio = filemap_get_folio_tag(mapping, &index, end / PAGE_SIZE,
+					      PAGECACHE_TAG_DIRTY);
+		if (!folio)
 			break;
 
-		for (int i = 0; i < nr; i++) {
-			ssize_t ret;
-			struct folio *folio = fbatch.folios[i];
-
-redo_folio:
-			start = folio_pos(folio); /* May regress with THPs */
+		start = folio_pos(folio); /* May regress with THPs */
 
-			/* At this point we hold neither the i_pages lock nor the
-			 * page lock: the page may be truncated or invalidated
-			 * (changing page->mapping to NULL), or even swizzled
-			 * back from swapper_space to tmpfs file mapping
-			 */
-			if (wbc->sync_mode != WB_SYNC_NONE) {
-				ret = folio_lock_killable(folio);
-				if (ret < 0)
-					goto write_error;
-			} else {
-				if (!folio_trylock(folio))
-					goto skip_write;
+		/* At this point we hold neither the i_pages lock nor the
+		 * page lock: the page may be truncated or invalidated
+		 * (changing page->mapping to NULL), or even swizzled
+		 * back from swapper_space to tmpfs file mapping
+		 */
+		if (wbc->sync_mode != WB_SYNC_NONE) {
+			ret = folio_lock_killable(folio);
+			if (ret < 0) {
+				folio_put(folio);
+				return ret;
 			}
-
-			if (folio_mapping(folio) != mapping ||
-			    !folio_test_dirty(folio)) {
-				folio_unlock(folio);
-				goto skip_write;
+		} else {
+			if (!folio_trylock(folio)) {
+				folio_put(folio);
+				return 0;
 			}
+		}
 
-			if (folio_test_writeback(folio) ||
-			    folio_test_fscache(folio)) {
-				folio_unlock(folio);
-				if (wbc->sync_mode == WB_SYNC_NONE)
-					goto skip_write;
+		if (folio_mapping(folio) != mapping ||
+		    !folio_test_dirty(folio)) {
+			start += folio_size(folio);
+			folio_unlock(folio);
+			folio_put(folio);
+			continue;
+		}
 
+		if (folio_test_writeback(folio) ||
+		    folio_test_fscache(folio)) {
+			folio_unlock(folio);
+			if (wbc->sync_mode != WB_SYNC_NONE) {
 				folio_wait_writeback(folio);
 #ifdef CONFIG_CIFS_FSCACHE
 				folio_wait_fscache(folio);
 #endif
-				goto redo_folio;
+			} else {
+				start += folio_size(folio);
 			}
-
-			if (!folio_clear_dirty_for_io(folio))
-				/* We hold the page lock - it should've been dirty. */
-				WARN_ON(1);
-
-			ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
-			if (ret < 0)
-				goto write_error;
-
-			start += ret;
-			continue;
-
-write_error:
-			folio_batch_release(&fbatch);
-			*_next = start;
-			return ret;
-
-skip_write:
-			/*
-			 * Too many skipped writes, or need to reschedule?
-			 * Treat it as a write error without an error code.
-			 */
-			if (skips >= 5 || need_resched()) {
-				ret = 0;
-				goto write_error;
+			folio_put(folio);
+			if (wbc->sync_mode == WB_SYNC_NONE) {
+				if (skips >= 5 || need_resched())
+					break;
+				skips++;
 			}
-
-			/* Otherwise, just skip that folio and go on to the next */
-			skips++;
-			start += folio_size(folio);
 			continue;
 		}
 
-		folio_batch_release(&fbatch);		
+		if (!folio_clear_dirty_for_io(folio))
+			/* We hold the page lock - it should've been dirty. */
+			WARN_ON(1);
+
+		ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end);
+		folio_put(folio);
+		if (ret < 0)
+			return ret;
+
+		start += ret;
 		cond_resched();
 	} while (wbc->nr_to_write > 0);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0acb8e1fb7af..577535633006 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -741,6 +741,8 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
 		pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
 unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
+struct folio *filemap_get_folio_tag(struct address_space *mapping, pgoff_t *start,
+				    pgoff_t end, xa_mark_t tag);
 
 struct page *grab_cache_page_write_begin(struct address_space *mapping,
 			pgoff_t index);
diff --git a/mm/filemap.c b/mm/filemap.c
index 2723104cc06a..1b1e9c661018 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2339,6 +2339,64 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
 }
 EXPORT_SYMBOL(filemap_get_folios_tag);
 
+/**
+ * filemap_get_folio_tag - Get the first folio matching @tag
+ * @mapping:    The address_space to search
+ * @start:      The starting page index
+ * @end:        The final page index (inclusive)
+ * @tag:        The tag index
+ *
+ * Search for and return the first folios in the mapping starting at index
+ * @start and up to index @end (inclusive).  The folio is returned with an
+ * elevated reference count.
+ *
+ * If a folio is returned, it may start before @start; if it does, it will
+ * contain @start.  The folio may also extend beyond @end; if it does, it will
+ * contain @end.  If folios are added to or removed from the page cache while
+ * this is running, they may or may not be found by this call.
+ *
+ * Return: The folio that was found or NULL.  @start is also updated to index
+ * the next folio for the traversal or will be left pointing after @end.
+ */
+struct folio *filemap_get_folio_tag(struct address_space *mapping, pgoff_t *start,
+				    pgoff_t end, xa_mark_t tag)
+{
+	XA_STATE(xas, &mapping->i_pages, *start);
+	struct folio *folio;
+
+	rcu_read_lock();
+	while ((folio = find_get_entry(&xas, end, tag)) != NULL) {
+		/*
+		 * Shadow entries should never be tagged, but this iteration
+		 * is lockless so there is a window for page reclaim to evict
+		 * a page we saw tagged. Skip over it.
+		 */
+		if (xa_is_value(folio))
+			continue;
+
+		if (folio_test_hugetlb(folio))
+			*start = folio->index + 1;
+		else
+			*start = folio_next_index(folio);
+		goto out;
+	}
+
+	/*
+	 * We come here when there is no page beyond @end. We take care to not
+	 * overflow the index @start as it confuses some of the callers. This
+	 * breaks the iteration when there is a page at index -1 but that is
+	 * already broke anyway.
+	 */
+	if (end == (pgoff_t)-1)
+		*start = (pgoff_t)-1;
+	else
+		*start = end + 1;
+out:
+	rcu_read_unlock();
+	return folio;
+}
+EXPORT_SYMBOL(filemap_get_folio_tag);
+
 /*
  * CD/DVDs are error prone. When a medium error occurs, the driver may fail
  * a _large_ part of the i/o request. Imagine the worst scenario:

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ