[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250313233341.1675324-33-dhowells@redhat.com>
Date: Thu, 13 Mar 2025 23:33:24 +0000
From: David Howells <dhowells@...hat.com>
To: Viacheslav Dubeyko <slava@...eyko.com>,
Alex Markuze <amarkuze@...hat.com>
Cc: David Howells <dhowells@...hat.com>,
Ilya Dryomov <idryomov@...il.com>,
Jeff Layton <jlayton@...nel.org>,
Dongsheng Yang <dongsheng.yang@...ystack.cn>,
ceph-devel@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [RFC PATCH 32/35] netfs: Add some more RMW support for ceph
Add some support for RMW in ceph:
(1) Add netfs_unbuffered_read_from_inode() to allow reading from an inode
without having a file pointer so that truncate can modify a
now-partial tail block of a content-encrypted file.
This takes an additional argument to cause it to fail or give a short
read if a hole is encountered. This is noted on the request with
NETFS_RREQ_NO_READ_HOLE for the filesystem to pick up.
(2) Set NETFS_RREQ_RMW when doing an RMW as part of a request.
(3) Provide a ->rmw_read_done() op for netfslib to tell the filesystem
that it has completed the read required for RMW.
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Jeff Layton <jlayton@...nel.org>
cc: Viacheslav Dubeyko <slava@...eyko.com>
cc: Alex Markuze <amarkuze@...hat.com>
cc: Ilya Dryomov <idryomov@...il.com>
cc: ceph-devel@...r.kernel.org
cc: linux-fsdevel@...r.kernel.org
---
fs/netfs/direct_read.c | 75 ++++++++++++++++++++++++++++++++++++
fs/netfs/direct_write.c | 1 +
fs/netfs/main.c | 1 +
fs/netfs/objects.c | 1 +
fs/netfs/read_collect.c | 2 +
fs/netfs/write_retry.c | 3 ++
include/linux/netfs.h | 7 ++++
include/trace/events/netfs.h | 3 ++
8 files changed, 93 insertions(+)
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index 5e4bd1e5a378..4061f934dfe6 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -373,3 +373,78 @@ ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
return ret;
}
EXPORT_SYMBOL(netfs_unbuffered_read_iter);
+
+/**
+ * netfs_unbuffered_read_from_inode - Perform an unbuffered sync I/O read
+ * @inode: The inode being accessed
+ * @pos: The file position to read from
+ * @iter: The output buffer (also specifies read length)
+ * @nohole: True to return short/ENODATA if hole encountered
+ *
+ * Perform a synchronous unbuffered I/O from the inode to the output buffer.
+ * No use is made of the pagecache. The output buffer must be suitably aligned
+ * if content encryption is to be used. If @nohole is true then the read will
+ * stop short if a hole is encountered and return -ENODATA if the read begins
+ * with a hole.
+ *
+ * The caller must hold any appropriate locks.
+ */
+ssize_t netfs_unbuffered_read_from_inode(struct inode *inode, loff_t pos,
+ struct iov_iter *iter, bool nohole)
+{
+ struct netfs_io_request *rreq;
+ ssize_t ret;
+ size_t orig_count = iov_iter_count(iter);
+
+ _enter("");
+
+ if (WARN_ON(user_backed_iter(iter)))
+ return -EIO;
+
+ if (!orig_count)
+ return 0; /* Don't update atime */
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, pos, orig_count);
+ if (ret < 0)
+ return ret;
+ inode_update_time(inode, S_ATIME);
+
+ rreq = netfs_alloc_request(inode->i_mapping, NULL, pos, orig_count,
+ NULL, NETFS_UNBUFFERED_READ);
+ if (IS_ERR(rreq))
+ return PTR_ERR(rreq);
+
+ ret = -EIO;
+ if (test_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags) &&
+ WARN_ON(!netfs_is_crypto_aligned(rreq, iter)))
+ goto out;
+
+ netfs_stat(&netfs_n_rh_dio_read);
+ trace_netfs_read(rreq, rreq->start, rreq->len,
+ netfs_read_trace_unbuffered_read_from_inode);
+
+ rreq->buffer.iter = *iter;
+ rreq->len = orig_count;
+ rreq->direct_bv_unpin = false;
+ iov_iter_advance(iter, orig_count);
+
+ if (nohole)
+ __set_bit(NETFS_RREQ_NO_READ_HOLE, &rreq->flags);
+
+ /* We're going to do the crypto in place in the destination buffer. */
+ if (test_bit(NETFS_RREQ_CONTENT_ENCRYPTION, &rreq->flags))
+ __set_bit(NETFS_RREQ_CRYPT_IN_PLACE, &rreq->flags);
+
+ ret = netfs_dispatch_unbuffered_reads(rreq);
+
+ if (!rreq->submitted) {
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
+ goto out;
+ }
+
+ ret = netfs_wait_for_read(rreq);
+out:
+ netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
+ return ret;
+}
+EXPORT_SYMBOL(netfs_unbuffered_read_from_inode);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index 83c5c06c4710..a99722f90c71 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -145,6 +145,7 @@ static ssize_t netfs_write_through_bounce_buffer(struct netfs_io_request *wreq,
wreq->start = gstart;
wreq->len = gend - gstart;
+ __set_bit(NETFS_RREQ_RMW, &ictx->flags);
if (gstart >= end) {
/* At or after EOF, nothing to read. */
} else {
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 07f8cffbda8c..0900dea53e4a 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READ_GAPS] = "RG",
[NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_UNBUFFERED_READ] = "UR",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
[NETFS_WRITEBACK_SINGLE] = "W1",
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 4606e830c116..958c4d460d07 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -60,6 +60,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
origin == NETFS_READ_GAPS ||
origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
+ origin == NETFS_UNBUFFERED_READ ||
origin == NETFS_DIO_READ) {
INIT_WORK(&rreq->work, netfs_read_collection_worker);
rreq->io_streams[0].avail = true;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 0a0bff90ca9e..013a90738dcd 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -462,6 +462,7 @@ static void netfs_read_collection(struct netfs_io_request *rreq)
//netfs_rreq_is_still_valid(rreq);
switch (rreq->origin) {
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
case NETFS_READ_GAPS:
case NETFS_RMW_READ:
@@ -681,6 +682,7 @@ ssize_t netfs_wait_for_read(struct netfs_io_request *rreq)
if (ret == 0) {
ret = rreq->transferred;
switch (rreq->origin) {
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
case NETFS_READ_SINGLE:
ret = rreq->transferred;
diff --git a/fs/netfs/write_retry.c b/fs/netfs/write_retry.c
index f727b48e2bfe..9e4e79d5a403 100644
--- a/fs/netfs/write_retry.c
+++ b/fs/netfs/write_retry.c
@@ -386,6 +386,9 @@ ssize_t netfs_rmw_read(struct netfs_io_request *wreq, struct file *file,
ret = 0;
}
+ if (ret == 0 && rreq->netfs_ops->rmw_read_done)
+ rreq->netfs_ops->rmw_read_done(wreq, rreq);
+
error:
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
return ret;
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 9d17d4bd9753..4049c985b9b4 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -220,6 +220,7 @@ enum netfs_io_origin {
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
NETFS_READ_SINGLE, /* This read should be treated as a single object */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
+ NETFS_UNBUFFERED_READ, /* This is an unbuffered I/O read */
NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_WRITEBACK, /* This write was triggered by writepages */
NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */
@@ -308,6 +309,9 @@ struct netfs_io_request {
#define NETFS_RREQ_CONTENT_ENCRYPTION 16 /* Content encryption is in use */
#define NETFS_RREQ_CRYPT_IN_PLACE 17 /* Do decryption in place */
#define NETFS_RREQ_PUT_RMW_TAIL 18 /* Need to put ->rmw_tail */
+#define NETFS_RREQ_RMW 19 /* Performing RMW cycle */
+#define NETFS_RREQ_REPEAT_RMW 20 /* Need to perform an RMW cycle */
+#define NETFS_RREQ_NO_READ_HOLE 21 /* Give short read/error if hole encountered */
#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
* write to cache on read */
const struct netfs_request_ops *netfs_ops;
@@ -336,6 +340,7 @@ struct netfs_request_ops {
/* Modification handling */
void (*update_i_size)(struct inode *inode, loff_t i_size);
void (*post_modify)(struct inode *inode, void *fs_priv);
+ void (*rmw_read_done)(struct netfs_io_request *wreq, struct netfs_io_request *rreq);
/* Write request handling */
void (*begin_writeback)(struct netfs_io_request *wreq);
@@ -432,6 +437,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
+ssize_t netfs_unbuffered_read_from_inode(struct inode *inode, loff_t pos,
+ struct iov_iter *iter, bool nohole);
/* High-level write API */
ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 74af82d773bd..9254c6f0e604 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -23,6 +23,7 @@
EM(netfs_read_trace_read_gaps, "READ-GAPS") \
EM(netfs_read_trace_read_single, "READ-SNGL") \
EM(netfs_read_trace_prefetch_for_write, "PREFETCHW") \
+ EM(netfs_read_trace_unbuffered_read_from_inode, "READ-INOD") \
E_(netfs_read_trace_write_begin, "WRITEBEGN")
#define netfs_write_traces \
@@ -38,6 +39,7 @@
EM(NETFS_READ_GAPS, "RG") \
EM(NETFS_READ_SINGLE, "R1") \
EM(NETFS_READ_FOR_WRITE, "RW") \
+ EM(NETFS_UNBUFFERED_READ, "UR") \
EM(NETFS_DIO_READ, "DR") \
EM(NETFS_WRITEBACK, "WB") \
EM(NETFS_WRITEBACK_SINGLE, "W1") \
@@ -104,6 +106,7 @@
EM(netfs_sreq_trace_io_progress, "IO ") \
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_need_clear, "N-CLR") \
+ EM(netfs_sreq_trace_need_rmw, "N-RMW") \
EM(netfs_sreq_trace_partial_read, "PARTR") \
EM(netfs_sreq_trace_need_retry, "ND-RT") \
EM(netfs_sreq_trace_pending, "PEND ") \
Powered by blists - more mailing lists