[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <cd7470e4cff8b16be8e553596a54a9783c6b9306.camel@ibm.com>
Date: Wed, 23 Apr 2025 22:16:01 +0000
From: Viacheslav Dubeyko <Slava.Dubeyko@....com>
To: "pc@...guebit.com" <pc@...guebit.com>, David Howells <dhowells@...hat.com>
CC: "linux-cifs@...r.kernel.org" <linux-cifs@...r.kernel.org>,
"brauner@...nel.org" <brauner@...nel.org>,
"linux-fsdevel@...r.kernel.org"
<linux-fsdevel@...r.kernel.org>,
"ceph-devel@...r.kernel.org"
<ceph-devel@...r.kernel.org>,
"linux-afs@...ts.infradead.org"
<linux-afs@...ts.infradead.org>,
"v9fs@...ts.linux.dev"
<v9fs@...ts.linux.dev>,
"netfs@...ts.linux.dev" <netfs@...ts.linux.dev>,
"sfrench@...ba.org" <sfrench@...ba.org>,
"linux-kernel@...r.kernel.org"
<linux-kernel@...r.kernel.org>,
"linux-nfs@...r.kernel.org"
<linux-nfs@...r.kernel.org>
Subject: Re: [PATCH] netfs: Fix undifferentiation of DIO reads from unbuffered
reads
On Wed, 2025-04-23 at 22:38 +0100, David Howells wrote:
> On cifs, "DIO reads" (specified by O_DIRECT) need to be differentiated from
> "unbuffered reads" (specified by cache=none in the mount parameters). The
> difference is flagged in the protocol and the server may behave
> differently: Windows Server will, for example, mandate that DIO reads are
> block aligned.
>
> Fix this by adding a NETFS_UNBUFFERED_READ to differentiate this from
> NETFS_DIO_READ, parallelling the write differentiation that already exists.
>
> A further patch will be required to make cifs do something different,
> depending on the rreq->origin set.
>
> Fixes: 016dc8516aec ("netfs: Implement unbuffered/DIO read support")
> Signed-off-by: David Howells <dhowells@...hat.com>
> cc: Paulo Alcantara <pc@...guebit.com>
> cc: Steve French <sfrench@...ba.org>
> cc: netfs@...ts.linux.dev
> cc: v9fs@...ts.linux.dev
> cc: linux-afs@...ts.infradead.org
> cc: linux-cifs@...r.kernel.org
> cc: ceph-devel@...r.kernel.org
> cc: linux-nfs@...r.kernel.org
> cc: linux-fsdevel@...r.kernel.org
> ---
> fs/9p/vfs_addr.c | 3 ++-
> fs/afs/write.c | 1 +
> fs/ceph/addr.c | 4 +++-
> fs/netfs/direct_read.c | 3 ++-
> fs/netfs/main.c | 1 +
> fs/netfs/misc.c | 1 +
> fs/netfs/objects.c | 1 +
> fs/netfs/read_collect.c | 7 +++++--
> fs/nfs/fscache.c | 1 +
> fs/smb/client/file.c | 3 ++-
> include/linux/netfs.h | 1 +
> include/trace/events/netfs.h | 1 +
> 12 files changed, 21 insertions(+), 6 deletions(-)
>
> diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
> index b5a4a28e0fe7..e4420591cf35 100644
> --- a/fs/9p/vfs_addr.c
> +++ b/fs/9p/vfs_addr.c
> @@ -77,7 +77,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
>
> /* if we just extended the file size, any portion not in
> * cache won't be on server and is zeroes */
> - if (subreq->rreq->origin != NETFS_DIO_READ)
> + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
> + subreq->rreq->origin != NETFS_DIO_READ)
> __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
> if (pos + total >= i_size_read(rreq->inode))
> __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
> diff --git a/fs/afs/write.c b/fs/afs/write.c
> index 7df7b2f5e7b2..2e7526ea883a 100644
> --- a/fs/afs/write.c
> +++ b/fs/afs/write.c
> @@ -202,6 +202,7 @@ void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *st
> case NETFS_READ_GAPS:
> case NETFS_READ_SINGLE:
> case NETFS_READ_FOR_WRITE:
> + case NETFS_UNBUFFERED_READ:
> case NETFS_DIO_READ:
> return;
> default:
> diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> index 557c326561fd..b95c4cb21c13 100644
> --- a/fs/ceph/addr.c
> +++ b/fs/ceph/addr.c
> @@ -238,6 +238,7 @@ static void finish_netfs_read(struct ceph_osd_request *req)
> if (sparse && err > 0)
> err = ceph_sparse_ext_map_end(op);
> if (err < subreq->len &&
> + subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
> subreq->rreq->origin != NETFS_DIO_READ)
> __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
> if (IS_ENCRYPTED(inode) && err > 0) {
> @@ -281,7 +282,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
> size_t len;
> int mode;
>
> - if (rreq->origin != NETFS_DIO_READ)
> + if (rreq->origin != NETFS_UNBUFFERED_READ &&
> + rreq->origin != NETFS_DIO_READ)
> __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
> __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
>
Looks good.
Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@....com>
Thanks,
Slava.
> diff --git a/fs/netfs/direct_read.c b/fs/netfs/direct_read.c
> index a24e63d2c818..9902766195d7 100644
> --- a/fs/netfs/direct_read.c
> +++ b/fs/netfs/direct_read.c
> @@ -188,7 +188,8 @@ ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *i
>
> rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
> iocb->ki_pos, orig_count,
> - NETFS_DIO_READ);
> + iocb->ki_flags & IOCB_DIRECT ?
> + NETFS_DIO_READ : NETFS_UNBUFFERED_READ);
> if (IS_ERR(rreq))
> return PTR_ERR(rreq);
>
> diff --git a/fs/netfs/main.c b/fs/netfs/main.c
> index 70ecc8f5f210..3db401d269e7 100644
> --- a/fs/netfs/main.c
> +++ b/fs/netfs/main.c
> @@ -39,6 +39,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
> [NETFS_READ_GAPS] = "RG",
> [NETFS_READ_SINGLE] = "R1",
> [NETFS_READ_FOR_WRITE] = "RW",
> + [NETFS_UNBUFFERED_READ] = "UR",
> [NETFS_DIO_READ] = "DR",
> [NETFS_WRITEBACK] = "WB",
> [NETFS_WRITEBACK_SINGLE] = "W1",
> diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
> index 77e7f7c79d27..43b67a28a8fa 100644
> --- a/fs/netfs/misc.c
> +++ b/fs/netfs/misc.c
> @@ -461,6 +461,7 @@ static ssize_t netfs_wait_for_request(struct netfs_io_request *rreq,
> case NETFS_DIO_READ:
> case NETFS_DIO_WRITE:
> case NETFS_READ_SINGLE:
> + case NETFS_UNBUFFERED_READ:
> case NETFS_UNBUFFERED_WRITE:
> break;
> default:
> diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
> index d3eb9ba3013a..31fa0c81e2a4 100644
> --- a/fs/netfs/objects.c
> +++ b/fs/netfs/objects.c
> @@ -59,6 +59,7 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
> origin == NETFS_READ_GAPS ||
> origin == NETFS_READ_SINGLE ||
> origin == NETFS_READ_FOR_WRITE ||
> + origin == NETFS_UNBUFFERED_READ ||
> origin == NETFS_DIO_READ) {
> INIT_WORK(&rreq->work, netfs_read_collection_worker);
> rreq->io_streams[0].avail = true;
> diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c
> index 900dd51c3b94..bad677e58a42 100644
> --- a/fs/netfs/read_collect.c
> +++ b/fs/netfs/read_collect.c
> @@ -342,7 +342,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
> {
> unsigned int i;
>
> - if (rreq->origin == NETFS_DIO_READ) {
> + if (rreq->origin == NETFS_UNBUFFERED_READ ||
> + rreq->origin == NETFS_DIO_READ) {
> for (i = 0; i < rreq->direct_bv_count; i++) {
> flush_dcache_page(rreq->direct_bv[i].bv_page);
> // TODO: cifs marks pages in the destination buffer
> @@ -360,7 +361,8 @@ static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
> }
> if (rreq->netfs_ops->done)
> rreq->netfs_ops->done(rreq);
> - if (rreq->origin == NETFS_DIO_READ)
> + if (rreq->origin == NETFS_UNBUFFERED_READ ||
> + rreq->origin == NETFS_DIO_READ)
> inode_dio_end(rreq->inode);
> }
>
> @@ -416,6 +418,7 @@ bool netfs_read_collection(struct netfs_io_request *rreq)
> //netfs_rreq_is_still_valid(rreq);
>
> switch (rreq->origin) {
> + case NETFS_UNBUFFERED_READ:
> case NETFS_DIO_READ:
> case NETFS_READ_GAPS:
> netfs_rreq_assess_dio(rreq);
> diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
> index e278a1ad1ca3..8b0785178731 100644
> --- a/fs/nfs/fscache.c
> +++ b/fs/nfs/fscache.c
> @@ -367,6 +367,7 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr)
>
> sreq = netfs->sreq;
> if (test_bit(NFS_IOHDR_EOF, &hdr->flags) &&
> + sreq->rreq->origin != NETFS_UNBUFFERED_READ &&
> sreq->rreq->origin != NETFS_DIO_READ)
> __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags);
>
> diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
> index 3bd34d2b98d0..76f91ce6b29b 100644
> --- a/fs/smb/client/file.c
> +++ b/fs/smb/client/file.c
> @@ -219,7 +219,8 @@ static void cifs_issue_read(struct netfs_io_subrequest *subreq)
> goto failed;
> }
>
> - if (subreq->rreq->origin != NETFS_DIO_READ)
> + if (subreq->rreq->origin != NETFS_UNBUFFERED_READ &&
> + subreq->rreq->origin != NETFS_DIO_READ)
> __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
>
> trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
> diff --git a/include/linux/netfs.h b/include/linux/netfs.h
> index c3f230732f51..1464b3a10498 100644
> --- a/include/linux/netfs.h
> +++ b/include/linux/netfs.h
> @@ -206,6 +206,7 @@ enum netfs_io_origin {
> NETFS_READ_GAPS, /* This read is a synchronous read to fill gaps */
> NETFS_READ_SINGLE, /* This read should be treated as a single object */
> NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
> + NETFS_UNBUFFERED_READ, /* This is an unbuffered read */
> NETFS_DIO_READ, /* This is a direct I/O read */
> NETFS_WRITEBACK, /* This write was triggered by writepages */
> NETFS_WRITEBACK_SINGLE, /* This monolithic write was triggered by writepages */
> diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
> index 402c5e82e7b8..4175eec40048 100644
> --- a/include/trace/events/netfs.h
> +++ b/include/trace/events/netfs.h
> @@ -39,6 +39,7 @@
> EM(NETFS_READ_GAPS, "RG") \
> EM(NETFS_READ_SINGLE, "R1") \
> EM(NETFS_READ_FOR_WRITE, "RW") \
> + EM(NETFS_UNBUFFERED_READ, "UR") \
> EM(NETFS_DIO_READ, "DR") \
> EM(NETFS_WRITEBACK, "WB") \
> EM(NETFS_WRITEBACK_SINGLE, "W1") \
>
>
Powered by blists - more mailing lists