[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250313233341.1675324-29-dhowells@redhat.com>
Date: Thu, 13 Mar 2025 23:33:20 +0000
From: David Howells <dhowells@...hat.com>
To: Viacheslav Dubeyko <slava@...eyko.com>,
Alex Markuze <amarkuze@...hat.com>
Cc: David Howells <dhowells@...hat.com>,
Ilya Dryomov <idryomov@...il.com>,
Jeff Layton <jlayton@...nel.org>,
Dongsheng Yang <dongsheng.yang@...ystack.cn>,
ceph-devel@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [RFC PATCH 28/35] netfs: Adjust group handling
Make some adjustments to the handling of netfs groups so that ceph can use
them for snap contexts:
- Move netfs_get_group(), netfs_put_group() and netfs_put_group_many() to
linux/netfs.h so that ceph can build its snap context on netfs groups.
- Move netfs_set_group() and __netfs_set_group() to linux/netfs.h so that
ceph_dirty_folio() can call them from inside of the locked section in
which it finds the snap context to attach.
- Provide a netfs_writepages_group() that takes a group as a parameter and
attaches it to the request and make netfs_free_request() drop the ref on
it. netfs_writepages() then becomes a wrapper that passes in a NULL
group.
- In netfs_perform_write(), only consider a folio to have a conflicting
group if the folio's group pointer isn't NULL and if the folio is dirty.
- In netfs_perform_write(), interject a small 10ms sleep after every 16
attempts to flush a folio within a single call.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Jeff Layton <jlayton@...nel.org>
cc: Viacheslav Dubeyko <slava@...eyko.com>
cc: Alex Markuze <amarkuze@...hat.com>
cc: Ilya Dryomov <idryomov@...il.com>
cc: ceph-devel@...r.kernel.org
cc: linux-fsdevel@...r.kernel.org
---
fs/netfs/buffered_write.c | 25 ++++-------------
fs/netfs/internal.h | 32 ---------------------
fs/netfs/objects.c | 1 +
fs/netfs/write_issue.c | 38 +++++++++++++++++++++----
include/linux/netfs.h | 59 +++++++++++++++++++++++++++++++++++++++
5 files changed, 98 insertions(+), 57 deletions(-)
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 0245449b93e3..12ddbe9bc78b 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -11,26 +11,9 @@
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/pagevec.h>
+#include <linux/delay.h>
#include "internal.h"
-static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
- if (netfs_group)
- folio_attach_private(folio, netfs_get_group(netfs_group));
-}
-
-static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
-{
- void *priv = folio_get_private(folio);
-
- if (unlikely(priv != netfs_group)) {
- if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
- folio_attach_private(folio, netfs_get_group(netfs_group));
- else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
- folio_detach_private(folio);
- }
-}
-
/*
* Grab a folio for writing and lock it. Attempt to allocate as large a folio
* as possible to hold as much of the remaining length as possible in one go.
@@ -113,6 +96,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
};
struct netfs_io_request *wreq = NULL;
struct folio *folio = NULL, *writethrough = NULL;
+ unsigned int flush_counter = 0;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0;
ssize_t written = 0, ret, ret2;
loff_t i_size, pos = iocb->ki_pos;
@@ -208,7 +192,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
group = netfs_folio_group(folio);
if (unlikely(group != netfs_group) &&
- group != NETFS_FOLIO_COPY_TO_CACHE)
+ group != NETFS_FOLIO_COPY_TO_CACHE &&
+ (group || folio_test_dirty(folio)))
goto flush_content;
if (folio_test_uptodate(folio)) {
@@ -341,6 +326,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
trace_netfs_folio(folio, netfs_flush_content);
folio_unlock(folio);
folio_put(folio);
+ if ((++flush_counter & 0xf) == 0xf)
+ msleep(10);
ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1);
if (ret < 0)
goto error_folio_unlock;
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index eebb4f0f660e..2a6123c4da35 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -261,38 +261,6 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
#endif
}
-/*
- * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
-{
- if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
- refcount_inc(&netfs_group->ref);
- return netfs_group;
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group(struct netfs_group *netfs_group)
-{
- if (netfs_group &&
- netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
- refcount_dec_and_test(&netfs_group->ref))
- netfs_group->free(netfs_group);
-}
-
-/*
- * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
- */
-static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
-{
- if (netfs_group &&
- netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
- refcount_sub_and_test(nr, &netfs_group->ref))
- netfs_group->free(netfs_group);
-}
-
/*
* Check to see if a buffer aligns with the crypto block size. If it doesn't
* the crypto layer is going to copy all the data - in which case relying on
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 52d6fce70837..7fdbaa5c5cab 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -153,6 +153,7 @@ static void netfs_free_request(struct work_struct *work)
kvfree(rreq->direct_bv);
}
+ netfs_put_group(rreq->group);
rolling_buffer_clear(&rreq->buffer);
rolling_buffer_clear(&rreq->bounce);
if (test_bit(NETFS_RREQ_PUT_RMW_TAIL, &rreq->flags))
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
index 93601033ba08..3921fcf4f859 100644
--- a/fs/netfs/write_issue.c
+++ b/fs/netfs/write_issue.c
@@ -418,7 +418,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq,
netfs_issue_write(wreq, upload);
} else if (fgroup != wreq->group) {
/* We can't write this page to the server yet. */
- kdebug("wrong group");
+ kdebug("wrong group %px != %px", fgroup, wreq->group);
folio_redirty_for_writepage(wbc, folio);
folio_unlock(folio);
netfs_issue_write(wreq, upload);
@@ -593,11 +593,19 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq)
netfs_wake_write_collector(wreq, false);
}
-/*
- * Write some of the pending data back to the server
+/**
+ * netfs_writepages_group - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ * @group: The write grouping to flush (or NULL)
+ *
+ * Start asynchronous write back operations to flush dirty data belonging to a
+ * particular group in a file's pagecache back to the server and to the local
+ * cache.
*/
-int netfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
+int netfs_writepages_group(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group)
{
struct netfs_inode *ictx = netfs_inode(mapping->host);
struct netfs_io_request *wreq = NULL;
@@ -618,12 +626,15 @@ int netfs_writepages(struct address_space *mapping,
if (!folio)
goto out;
- wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
+ wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio),
+ NETFS_WRITEBACK);
if (IS_ERR(wreq)) {
error = PTR_ERR(wreq);
goto couldnt_start;
}
+ wreq->group = netfs_get_group(group);
+
trace_netfs_write(wreq, netfs_write_trace_writeback);
netfs_stat(&netfs_n_wh_writepages);
@@ -659,6 +670,21 @@ int netfs_writepages(struct address_space *mapping,
_leave(" = %d", error);
return error;
}
+EXPORT_SYMBOL(netfs_writepages_group);
+
+/**
+ * netfs_writepages - Flush data from the pagecache for a file
+ * @mapping: The file to flush from
+ * @wbc: Details of what should be flushed
+ *
+ * Start asynchronous write back operations to flush dirty data in a file's
+ * pagecache back to the server and to the local cache.
+ */
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ return netfs_writepages_group(mapping, wbc, NULL);
+}
EXPORT_SYMBOL(netfs_writepages);
/*
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index a67297de8a20..69052ac47ab1 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -457,6 +457,9 @@ int netfs_read_folio(struct file *, struct folio *);
int netfs_write_begin(struct netfs_inode *, struct file *,
struct address_space *, loff_t pos, unsigned int len,
struct folio **, void **fsdata);
+int netfs_writepages_group(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct netfs_group *group);
int netfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
@@ -597,4 +600,60 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode)
wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0);
}
+/*
+ * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
+{
+ if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
+ refcount_inc(&netfs_group->ref);
+ return netfs_group;
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group(struct netfs_group *netfs_group)
+{
+ if (netfs_group &&
+ netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+ refcount_dec_and_test(&netfs_group->ref))
+ netfs_group->free(netfs_group);
+}
+
+/*
+ * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap).
+ */
+static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
+{
+ if (netfs_group &&
+ netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+ refcount_sub_and_test(nr, &netfs_group->ref))
+ netfs_group->free(netfs_group);
+}
+
+/*
+ * Set the group pointer directly on a folio.
+ */
+static inline void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+ if (netfs_group)
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+}
+
+/*
+ * Set the group pointer on a folio or the folio info record.
+ */
+static inline void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
+{
+ void *priv = folio_get_private(folio);
+
+ if (unlikely(priv != netfs_group)) {
+ if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+ else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+ folio_detach_private(folio);
+ }
+}
+
#endif /* _LINUX_NETFS_H */
Powered by blists - more mailing lists