[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <3064919.1745444289@warthog.procyon.org.uk>
Date: Wed, 23 Apr 2025 22:38:09 +0100
From: David Howells <dhowells@...hat.com>
To: Paulo Alcantara <pc@...guebit.com>
cc: dhowells@...hat.com, Steve French <sfrench@...ba.org>,
Christian Brauner <brauner@...nel.org>, netfs@...ts.linux.dev,
v9fs@...ts.linux.dev, linux-afs@...ts.infradead.org,
linux-cifs@...r.kernel.org, ceph-devel@...r.kernel.org,
linux-nfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH] netfs: Fix undifferentiation of DIO reads from unbuffered reads
On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from
"unbuffered reads" (specified by cache=none in the mount parameters). The
difference is flagged in the protocol and the server may behave
differently: Windows Server will, for example, mandate that DIO reads are
block aligned.
Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from
NETFS_DIO_READ, parallelling the write differentiation that already exists.
A further patch will be required to make cifs do something different,
depending on the rreq->origin set.
Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support")
Signed-off-by: David Howells <dhowells@...hat.com>
cc: Paulo Alcantara <pc@...guebit.com>
cc: Steve French <sfrench@...ba.org>
cc: netfs@...ts.linux.dev
cc: v9fs@...ts.linux.dev
cc: linux-afs@...ts.infradead.org
cc: linux-cifs@...r.kernel.org
cc: ceph-devel@...r.kernel.org
cc: linux-nfs@...r.kernel.org
cc: linux-fsdevel@...r.kernel.org
---
fs/9p/vfs_addr.c | 3 ++-
fs/afs/write.c | 1 +
fs/ceph/addr.c | 4 +++-
fs/netfs/direct_read.c | 3 ++-
fs/netfs/main.c | 1 +
fs/netfs/misc.c | 1 +
fs/netfs/objects.c | 1 +
fs/netfs/read_collect.c | 7 +++++--
fs/nfs/fscache.c | 1 +
fs/smb/client/file.c | 3 ++-
include/linux/netfs.h | 1 +
include/trace/events/netfs.h | 1 +
12 files changed, 21 insertions(+), 6 deletions(-)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index b5a4a28e0fe7..e4420591cf35 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (pos + total >= i_size_read(rreq->inode))
__set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 7df7b2f5e7b2..2e7526ea883a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
case NETFS_READ_GAPS:
case NETFS_READ_SINGLE:
case NETFS_READ_FOR_WRITE:
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
return;
default:
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 557c326561fd..b95c4cb21c13 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
if (err < subreq->len &&
+ subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
@@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
size_t len;
int mode;
- if (rreq->origin != NETFS_DIO_READ)
+ if (rreq->origin != NETFS_UNBUFFERED_READ &&
+ rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
index a24e63d2c818..9902766195d7 100644
--- a/fs/netfs/direct_read.c
+++ b/fs/netfs/direct_read.c
@@ -188,7 +188,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
iocb->ki_pos, orig_count,
- NETFS_DIO_READ);
+ iocb->ki_flags & IOCB_DIRECT ?
+ NETFS_DIO_READ : NETFS_UNBUFFERED_READ);
if (IS_ERR(rreq))
return PTR_ERR(rreq);
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 70ecc8f5f210..3db401d269e7 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READ_GAPS] = "RG",
[NETFS_READ_SINGLE] = "R1",
[NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_UNBUFFERED_READ] = "UR",
[NETFS_DIO_READ] = "DR",
[NETFS_WRITEBACK] = "WB",
[NETFS_WRITEBACK_SINGLE] = "W1",
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 77e7f7c79d27..43b67a28a8fa 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -461,6 +461,7 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
case NETFS_DIO_READ:
case NETFS_DIO_WRITE:
case NETFS_READ_SINGLE:
+ case NETFS_UNBUFFERED_READ:
case NETFS_UNBUFFERED_WRITE:
break;
default:
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index d3eb9ba3013a..31fa0c81e2a4 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -59,6 +59,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
origin == NETFS_READ_GAPS ||
origin == NETFS_READ_SINGLE ||
origin == NETFS_READ_FOR_WRITE ||
+ origin == NETFS_UNBUFFERED_READ ||
origin == NETFS_DIO_READ) {
INIT_WORK(&rreq->work, netfs_read_collection_worker);
rreq->io_streams[0].avail = true;
diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
index 900dd51c3b94..bad677e58a42 100644
--- a/fs/netfs/read_collect.c
+++ b/fs/netfs/read_collect.c
@@ -342,7 +342,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
{
unsigned int i;
- if (rreq->origin == NETFS_DIO_READ) {
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ) {
for (i = 0; i < rreq->direct_bv_count; i++) {
flush_dcache_page(rreq->direct_bv[i].bv_page);
// TODO: cifs marks pages in the destination buffer
@@ -360,7 +361,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
}
if (rreq->netfs_ops->done)
rreq->netfs_ops->done(rreq);
- if (rreq->origin == NETFS_DIO_READ)
+ if (rreq->origin == NETFS_UNBUFFERED_READ ||
+ rreq->origin == NETFS_DIO_READ)
inode_dio_end(rreq->inode);
}
@@ -416,6 +418,7 @@ bool netfs_read_collection(struct netfs_io_request *rreq)
//netfs_rreq_is_still_valid(rreq);
switch (rreq->origin) {
+ case NETFS_UNBUFFERED_READ:
case NETFS_DIO_READ:
case NETFS_READ_GAPS:
netfs_rreq_assess_dio(rreq);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index e278a1ad1ca3..8b0785178731 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
sreq = netfs->sreq;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
+ sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
sreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 3bd34d2b98d0..76f91ce6b29b 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
goto failed;
}
- if (subreq->rreq->origin != NETFS_DIO_READ)
+ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index c3f230732f51..1464b3a10498 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -206,6 +206,7 @@ enum netfs_io_origin {
NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
NETFS_READ_SINGLE, /* This read should be treated as a single object */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
+ NETFS_UNBUFFERED_READ, /* This is an unbuffered read */
NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_WRITEBACK, /* This write was triggered by writepages */
NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 402c5e82e7b8..4175eec40048 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -39,6 +39,7 @@
EM(NETFS_READ_GAPS, "RG") \
EM(NETFS_READ_SINGLE, "R1") \
EM(NETFS_READ_FOR_WRITE, "RW") \
+ EM(NETFS_UNBUFFERED_READ, "UR") \
EM(NETFS_DIO_READ, "DR") \
EM(NETFS_WRITEBACK, "WB") \
EM(NETFS_WRITEBACK_SINGLE, "W1") \
Powered by blists - more mailing lists