linux-kernel - [RFC PATCH 28/35] netfs: Adjust group handling

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250313233341.1675324-29-dhowells@redhat.com>
Date: Thu, 13 Mar 2025 23:33:20 +0000
From: David Howells <dhowells@...hat.com>
To: Viacheslav Dubeyko <slava@...eyko.com>,
	Alex Markuze <amarkuze@...hat.com>
Cc: David Howells <dhowells@...hat.com>,
	Ilya Dryomov <idryomov@...il.com>,
	Jeff Layton <jlayton@...nel.org>,
	Dongsheng Yang <dongsheng.yang@...ystack.cn>,
	ceph-devel@...r.kernel.org,
	linux-fsdevel@...r.kernel.org,
	linux-block@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [RFC PATCH 28/35] netfs: Adjust group handling

Make some adjustments to the handling of netfs groups so that ceph can use
them for snap contexts:

 - Move netfs_get_group(), netfs_put_group() and netfs_put_group_many() to
   linux/netfs.h so that ceph can build its snap context on netfs groups.

 - Move netfs_set_group() and __netfs_set_group() to linux/netfs.h so that
   ceph_dirty_folio() can call them from inside of the locked section in
   which it finds the snap context to attach.

 - Provide a netfs_writepages_group() that takes a group as a parameter and
   attaches it to the request and make netfs_free_request() drop the ref on
   it.  netfs_writepages() then becomes a wrapper that passes in a NULL
   group.

 - In netfs_perform_write(), only consider a folio to have a conflicting
   group if the folio's group pointer isn't NULL and if the folio is dirty.

 - In netfs_perform_write(), interject a small 10ms sleep after every 16
   attempts to flush a folio within a single call.

Signed-off-by: David Howells <dhowells@...hat.com>
cc: Jeff Layton <jlayton@...nel.org>
cc: Viacheslav Dubeyko <slava@...eyko.com>
cc: Alex Markuze <amarkuze@...hat.com>
cc: Ilya Dryomov <idryomov@...il.com>
cc: ceph-devel@...r.kernel.org
cc: linux-fsdevel@...r.kernel.org
---
 fs/netfs/buffered_write.c | 25 ++++-------------
 fs/netfs/internal.h       | 32 ---------------------
 fs/netfs/objects.c        |  1 +
 fs/netfs/write_issue.c    | 38 +++++++++++++++++++++----
 include/linux/netfs.h     | 59 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 98 insertions(+), 57 deletions(-)

diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 0245449b93e3..12ddbe9bc78b 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -11,26 +11,9 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/pagevec.h>
+#include <linux/delay.h>
 #include "internal.h"
 
-static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
-	if (netfs_group)
-		folio_attach_private(folio, netfs_get_group(netfs_group));
-}
-
-static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
-	void *priv = folio_get_private(folio);
-
-	if (unlikely(priv != netfs_group)) {
-		if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
-			folio_attach_private(folio, netfs_get_group(netfs_group));
-		else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
-			folio_detach_private(folio);
-	}
-}
-
 /*
  * Grab a folio for writing and lock it.  Attempt to allocate as large a folio
  * as possible to hold as much of the remaining length as possible in one go.
@@ -113,6 +96,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 	};
 	struct netfs_io_request *wreq = NULL;
 	struct folio *folio = NULL, *writethrough = NULL;
+	unsigned int flush_counter = 0;
 	unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
 	ssize_t written = 0, ret, ret2;
 	loff_t i_size, pos = iocb->ki_pos;
@@ -208,7 +192,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 		group = netfs_folio_group(folio);
 
 		if (unlikely(group != netfs_group) &&
-		    group != NETFS_FOLIO_COPY_TO_CACHE)
+		    group != NETFS_FOLIO_COPY_TO_CACHE &&
+		    (group || folio_test_dirty(folio)))
 			goto flush_content;
 
 		if (folio_test_uptodate(folio)) {
@@ -341,6 +326,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
 		trace_netfs_folio(folio, netfs_flush_content);
 		folio_unlock(folio);
 		folio_put(folio);
+		if ((++flush_counter & 0xf) == 0xf)
+			msleep(10);
 		ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1);
 		if (ret < 0)
 			goto error_folio_unlock;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index eebb4f0f660e..2a6123c4da35 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -261,38 +261,6 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
 #endif
 }
 
-/*
- * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
-{
-	if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
-		refcount_inc(&netfs_group->ref);
-	return netfs_group;
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group(struct netfs_group *netfs_group)
-{
-	if (netfs_group &&
-	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
-	    refcount_dec_and_test(&netfs_group->ref))
-		netfs_group->free(netfs_group);
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
-{
-	if (netfs_group &&
-	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
-	    refcount_sub_and_test(nr, &netfs_group->ref))
-		netfs_group->free(netfs_group);
-}
-
 /*
  * Check to see if a buffer aligns with the crypto block size.  If it doesn't
  * the crypto layer is going to copy all the data - in which case relying on
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 52d6fce70837..7fdbaa5c5cab 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -153,6 +153,7 @@ static void netfs_free_request(struct work_struct *work)
 		kvfree(rreq->direct_bv);
 	}
 
+	netfs_put_group(rreq->group);
 	rolling_buffer_clear(&rreq->buffer);
 	rolling_buffer_clear(&rreq->bounce);
 	if (test_bit(NETFS_RREQ_PUT_RMW_TAIL, &rreq->flags))
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 93601033ba08..3921fcf4f859 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -418,7 +418,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
 		netfs_issue_write(wreq, upload);
 	} else if (fgroup != wreq->group) {
 		/* We can't write this page to the server yet. */
-		kdebug("wrong group");
+		kdebug("wrong group %px != %px", fgroup, wreq->group);
 		folio_redirty_for_writepage(wbc, folio);
 		folio_unlock(folio);
 		netfs_issue_write(wreq, upload);
@@ -593,11 +593,19 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
 		netfs_wake_write_collector(wreq, false);
 }
 
-/*
- * Write some of the pending data back to the server
+/**
+ * netfs_writepages_group - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ * @group: The write grouping to flush (or NULL)
+ *
+ * Start asynchronous write back operations to flush dirty data belonging to a
+ * particular group in a file's pagecache back to the server and to the local
+ * cache.
  */
-int netfs_writepages(struct address_space *mapping,
-		     struct writeback_control *wbc)
+int netfs_writepages_group(struct address_space *mapping,
+			   struct writeback_control *wbc,
+			   struct netfs_group *group)
 {
 	struct netfs_inode *ictx = netfs_inode(mapping->host);
 	struct netfs_io_request *wreq = NULL;
@@ -618,12 +626,15 @@ int netfs_writepages(struct address_space *mapping,
 	if (!folio)
 		goto out;
 
-	wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
+	wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio),
+				      NETFS_WRITEBACK);
 	if (IS_ERR(wreq)) {
 		error = PTR_ERR(wreq);
 		goto couldnt_start;
 	}
 
+	wreq->group = netfs_get_group(group);
+
 	trace_netfs_write(wreq, netfs_write_trace_writeback);
 	netfs_stat(&netfs_n_wh_writepages);
 
@@ -659,6 +670,21 @@ int netfs_writepages(struct address_space *mapping,
 	_leave(" = %d", error);
 	return error;
 }
+EXPORT_SYMBOL(netfs_writepages_group);
+
+/**
+ * netfs_writepages - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ *
+ * Start asynchronous write back operations to flush dirty data in a file's
+ * pagecache back to the server and to the local cache.
+ */
+int netfs_writepages(struct address_space *mapping,
+		     struct writeback_control *wbc)
+{
+	return netfs_writepages_group(mapping, wbc, NULL);
+}
 EXPORT_SYMBOL(netfs_writepages);
 
 /*
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index a67297de8a20..69052ac47ab1 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -457,6 +457,9 @@ int netfs_read_folio(struct file *, struct folio *);
 int netfs_write_begin(struct netfs_inode *, struct file *,
 		      struct address_space *, loff_t pos, unsigned int len,
 		      struct folio **, void **fsdata);
+int netfs_writepages_group(struct address_space *mapping,
+			   struct writeback_control *wbc,
+			   struct netfs_group *group);
 int netfs_writepages(struct address_space *mapping,
 		     struct writeback_control *wbc);
 bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
@@ -597,4 +600,60 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode)
 	wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
 }
 
+/*
+ * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
+{
+	if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
+		refcount_inc(&netfs_group->ref);
+	return netfs_group;
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group(struct netfs_group *netfs_group)
+{
+	if (netfs_group &&
+	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+	    refcount_dec_and_test(&netfs_group->ref))
+		netfs_group->free(netfs_group);
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
+{
+	if (netfs_group &&
+	    netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+	    refcount_sub_and_test(nr, &netfs_group->ref))
+		netfs_group->free(netfs_group);
+}
+
+/*
+ * Set the group pointer directly on a folio.
+ */
+static inline void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+	if (netfs_group)
+		folio_attach_private(folio, netfs_get_group(netfs_group));
+}
+
+/*
+ * Set the group pointer on a folio or the folio info record.
+ */
+static inline void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+	void *priv = folio_get_private(folio);
+
+	if (unlikely(priv != netfs_group)) {
+		if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
+			folio_attach_private(folio, netfs_get_group(netfs_group));
+		else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+			folio_detach_private(folio);
+	}
+}
+
 #endif /* _LINUX_NETFS_H */