[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <520bd301-4526-4364-bbfa-5f591ab8f60a@oracle.com>
Date: Thu, 3 Jul 2025 16:07:51 -0400
From: Chuck Lever <chuck.lever@...cle.com>
To: Jeff Layton <jlayton@...nel.org>, Trond Myklebust <trondmy@...nel.org>,
Anna Schumaker <anna@...nel.org>, NeilBrown <neil@...wn.name>,
Olga Kornievskaia <okorniev@...hat.com>, Dai Ngo <Dai.Ngo@...cle.com>,
Tom Talpey <tom@...pey.com>, Mike Snitzer <snitzer@...nel.org>
Cc: linux-nfs@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH RFC 2/2] nfsd: call generic_fadvise after v3 READ, stable
WRITE or COMMIT
On 7/3/25 3:53 PM, Jeff Layton wrote:
> Recent testing has shown that keeping pagecache pages around for too
> long can be detrimental to performance with nfsd. Clients only rarely
> revisit the same data, so the pages tend to just hang around.
>
> This patch changes the pc_release callbacks for NFSv3 READ, WRITE and
> COMMIT to call generic_fadvise(..., POSIX_FADV_DONTNEED) on the accessed
> range.
>
> Signed-off-by: Jeff Layton <jlayton@...nel.org>
> ---
> fs/nfsd/debugfs.c | 2 ++
> fs/nfsd/nfs3proc.c | 59 +++++++++++++++++++++++++++++++++++++++++++++---------
> fs/nfsd/nfsd.h | 1 +
> fs/nfsd/nfsproc.c | 4 ++--
> fs/nfsd/vfs.c | 21 ++++++++++++++-----
> fs/nfsd/vfs.h | 5 +++--
> fs/nfsd/xdr3.h | 3 +++
> 7 files changed, 77 insertions(+), 18 deletions(-)
>
> diff --git a/fs/nfsd/debugfs.c b/fs/nfsd/debugfs.c
> index 84b0c8b559dc90bd5c2d9d5e15c8e0682c0d610c..b007718dd959bc081166ec84e06f577a8fc2b46b 100644
> --- a/fs/nfsd/debugfs.c
> +++ b/fs/nfsd/debugfs.c
> @@ -44,4 +44,6 @@ void nfsd_debugfs_init(void)
>
> debugfs_create_file("disable-splice-read", S_IWUSR | S_IRUGO,
> nfsd_top_dir, NULL, &nfsd_dsr_fops);
> + debugfs_create_bool("enable-fadvise-dontneed", 0644,
> + nfsd_top_dir, &nfsd_enable_fadvise_dontneed);
I prefer that this setting is folded into the new io_cache_read /
io_cache_write tune-ables that Mike's patch adds, rather than adding
a new boolean.
That might make a hybrid "DONTCACHE for READ and fadvise for WRITE"
pretty easy.
> }
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index b6d03e1ef5f7a5e8dd111b0d56c061f1e91abff7..11261cf67ea817ec566626f08b733e09c9e121de 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -9,6 +9,7 @@
> #include <linux/ext2_fs.h>
> #include <linux/magic.h>
> #include <linux/namei.h>
> +#include <linux/fadvise.h>
>
> #include "cache.h"
> #include "xdr3.h"
> @@ -206,11 +207,25 @@ nfsd3_proc_read(struct svc_rqst *rqstp)
>
> fh_copy(&resp->fh, &argp->fh);
> resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
> - &resp->count, &resp->eof);
> + &resp->count, &resp->eof, &resp->nf);
> resp->status = nfsd3_map_status(resp->status);
> return rpc_success;
> }
>
> +static void
> +nfsd3_release_read(struct svc_rqst *rqstp)
> +{
> + struct nfsd3_readargs *argp = rqstp->rq_argp;
> + struct nfsd3_readres *resp = rqstp->rq_resp;
> +
> + if (nfsd_enable_fadvise_dontneed && resp->status == nfs_ok)
> + generic_fadvise(nfsd_file_file(resp->nf), argp->offset, resp->count,
> + POSIX_FADV_DONTNEED);
> + if (resp->nf)
> + nfsd_file_put(resp->nf);
> + fh_put(&resp->fh);
> +}
> +
> /*
> * Write data to a file
> */
> @@ -236,12 +251,26 @@ nfsd3_proc_write(struct svc_rqst *rqstp)
> resp->committed = argp->stable;
> resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
> &argp->payload, &cnt,
> - resp->committed, resp->verf);
> + resp->committed, resp->verf, &resp->nf);
> resp->count = cnt;
> resp->status = nfsd3_map_status(resp->status);
> return rpc_success;
> }
>
> +static void
> +nfsd3_release_write(struct svc_rqst *rqstp)
> +{
> + struct nfsd3_writeargs *argp = rqstp->rq_argp;
> + struct nfsd3_writeres *resp = rqstp->rq_resp;
> +
> + if (nfsd_enable_fadvise_dontneed && resp->status == nfs_ok && resp->committed)
> + generic_fadvise(nfsd_file_file(resp->nf), argp->offset, resp->count,
> + POSIX_FADV_DONTNEED);
> + if (resp->nf)
> + nfsd_file_put(resp->nf);
> + fh_put(&resp->fh);
> +}
> +
> /*
> * Implement NFSv3's unchecked, guarded, and exclusive CREATE
> * semantics for regular files. Except for the created file,
> @@ -748,7 +777,6 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
> {
> struct nfsd3_commitargs *argp = rqstp->rq_argp;
> struct nfsd3_commitres *resp = rqstp->rq_resp;
> - struct nfsd_file *nf;
>
> dprintk("nfsd: COMMIT(3) %s %u@%Lu\n",
> SVCFH_fmt(&argp->fh),
> @@ -757,17 +785,30 @@ nfsd3_proc_commit(struct svc_rqst *rqstp)
>
> fh_copy(&resp->fh, &argp->fh);
> resp->status = nfsd_file_acquire_gc(rqstp, &resp->fh, NFSD_MAY_WRITE |
> - NFSD_MAY_NOT_BREAK_LEASE, &nf);
> + NFSD_MAY_NOT_BREAK_LEASE, &resp->nf);
> if (resp->status)
> goto out;
> - resp->status = nfsd_commit(rqstp, &resp->fh, nf, argp->offset,
> + resp->status = nfsd_commit(rqstp, &resp->fh, resp->nf, argp->offset,
> argp->count, resp->verf);
> - nfsd_file_put(nf);
> out:
> resp->status = nfsd3_map_status(resp->status);
> return rpc_success;
> }
>
> +static void
> +nfsd3_release_commit(struct svc_rqst *rqstp)
> +{
> + struct nfsd3_commitargs *argp = rqstp->rq_argp;
> + struct nfsd3_commitres *resp = rqstp->rq_resp;
> +
> + if (nfsd_enable_fadvise_dontneed && resp->status == nfs_ok)
> + generic_fadvise(nfsd_file_file(resp->nf), argp->offset, argp->count,
> + POSIX_FADV_DONTNEED);
> + if (resp->nf)
> + nfsd_file_put(resp->nf);
> + fh_put(&resp->fh);
> +}
> +
>
> /*
> * NFSv3 Server procedures.
> @@ -864,7 +905,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
> .pc_func = nfsd3_proc_read,
> .pc_decode = nfs3svc_decode_readargs,
> .pc_encode = nfs3svc_encode_readres,
> - .pc_release = nfs3svc_release_fhandle,
> + .pc_release = nfsd3_release_read,
> .pc_argsize = sizeof(struct nfsd3_readargs),
> .pc_argzero = sizeof(struct nfsd3_readargs),
> .pc_ressize = sizeof(struct nfsd3_readres),
> @@ -876,7 +917,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
> .pc_func = nfsd3_proc_write,
> .pc_decode = nfs3svc_decode_writeargs,
> .pc_encode = nfs3svc_encode_writeres,
> - .pc_release = nfs3svc_release_fhandle,
> + .pc_release = nfsd3_release_write,
> .pc_argsize = sizeof(struct nfsd3_writeargs),
> .pc_argzero = sizeof(struct nfsd3_writeargs),
> .pc_ressize = sizeof(struct nfsd3_writeres),
> @@ -1039,7 +1080,7 @@ static const struct svc_procedure nfsd_procedures3[22] = {
> .pc_func = nfsd3_proc_commit,
> .pc_decode = nfs3svc_decode_commitargs,
> .pc_encode = nfs3svc_encode_commitres,
> - .pc_release = nfs3svc_release_fhandle,
> + .pc_release = nfsd3_release_commit,
> .pc_argsize = sizeof(struct nfsd3_commitargs),
> .pc_argzero = sizeof(struct nfsd3_commitargs),
> .pc_ressize = sizeof(struct nfsd3_commitres),
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 1cd0bed57bc2f27248fd66a8efef692a5e9a390c..288904d88b9245c03eae0aa347e867037b7c85c5 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -152,6 +152,7 @@ static inline void nfsd_debugfs_exit(void) {}
> #endif
>
> extern bool nfsd_disable_splice_read __read_mostly;
> +extern bool nfsd_enable_fadvise_dontneed __read_mostly;
>
> extern int nfsd_max_blksize;
>
> diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
> index 8f71f5748c75b69f15bae5e63799842e0e8b3139..bb8f98adb3e31b10adc4694987f8f5036726bf7f 100644
> --- a/fs/nfsd/nfsproc.c
> +++ b/fs/nfsd/nfsproc.c
> @@ -225,7 +225,7 @@ nfsd_proc_read(struct svc_rqst *rqstp)
> resp->count = argp->count;
> fh_copy(&resp->fh, &argp->fh);
> resp->status = nfsd_read(rqstp, &resp->fh, argp->offset,
> - &resp->count, &eof);
> + &resp->count, &eof, NULL);
> if (resp->status == nfs_ok)
> resp->status = fh_getattr(&resp->fh, &resp->stat);
> else if (resp->status == nfserr_jukebox)
> @@ -258,7 +258,7 @@ nfsd_proc_write(struct svc_rqst *rqstp)
>
> fh_copy(&resp->fh, &argp->fh);
> resp->status = nfsd_write(rqstp, &resp->fh, argp->offset,
> - &argp->payload, &cnt, NFS_DATA_SYNC, NULL);
> + &argp->payload, &cnt, NFS_DATA_SYNC, NULL, NULL);
> if (resp->status == nfs_ok)
> resp->status = fh_getattr(&resp->fh, &resp->stat);
> else if (resp->status == nfserr_jukebox)
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index ee78b6fb17098b788b07f5cd90598e678244b57e..f23eb3a07bb99dc231be9ea6db4e58b9e328a689 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -49,6 +49,7 @@
> #define NFSDDBG_FACILITY NFSDDBG_FILEOP
>
> bool nfsd_disable_splice_read __read_mostly;
> +bool nfsd_enable_fadvise_dontneed __read_mostly;
>
> /**
> * nfserrno - Map Linux errnos to NFS errnos
> @@ -1280,6 +1281,7 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
> * @offset: starting byte offset
> * @count: IN: requested number of bytes; OUT: number of bytes read
> * @eof: OUT: set non-zero if operation reached the end of the file
> + * @pnf: optional return pointer to pass back nfsd_file reference
> *
> * The caller must verify that there is enough space in @rqstp.rq_res
> * to perform this operation.
> @@ -1290,7 +1292,8 @@ bool nfsd_read_splice_ok(struct svc_rqst *rqstp)
> * returned.
> */
> __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> - loff_t offset, unsigned long *count, u32 *eof)
> + loff_t offset, unsigned long *count, u32 *eof,
> + struct nfsd_file **pnf)
> {
> struct nfsd_file *nf;
> struct file *file;
> @@ -1307,7 +1310,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> else
> err = nfsd_iter_read(rqstp, fhp, file, offset, count, 0, eof);
>
> - nfsd_file_put(nf);
> + if (pnf && err == nfs_ok)
> + *pnf = nf;
> + else
> + nfsd_file_put(nf);
> trace_nfsd_read_done(rqstp, fhp, offset, *count);
> return err;
> }
> @@ -1321,8 +1327,10 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> * @cnt: IN: number of bytes to write, OUT: number of bytes actually written
> * @stable: An NFS stable_how value
> * @verf: NFS WRITE verifier
> + * @pnf: optional return pointer to pass back nfsd_file reference
> *
> - * Upon return, caller must invoke fh_put on @fhp.
> + * Upon return, caller must invoke fh_put() on @fhp. If it sets @pnf,
> + * then it must also call nfsd_file_put() on the returned reference.
> *
> * Return values:
> * An nfsstat value in network byte order.
> @@ -1330,7 +1338,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> __be32
> nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
> const struct xdr_buf *payload, unsigned long *cnt, int stable,
> - __be32 *verf)
> + __be32 *verf, struct nfsd_file **pnf)
> {
> struct nfsd_file *nf;
> __be32 err;
> @@ -1343,7 +1351,10 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
>
> err = nfsd_vfs_write(rqstp, fhp, nf, offset, payload, cnt,
> stable, verf);
> - nfsd_file_put(nf);
> + if (pnf && err == nfs_ok)
> + *pnf = nf;
> + else
> + nfsd_file_put(nf);
> out:
> trace_nfsd_write_done(rqstp, fhp, offset, *cnt);
> return err;
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index eff04959606fe55c141ab4a2eed97c7e0716a5f5..2d063ee7786f499f34c39cd3ba7e776bb7562d57 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -127,10 +127,11 @@ __be32 nfsd_iter_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> bool nfsd_read_splice_ok(struct svc_rqst *rqstp);
> __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
> loff_t offset, unsigned long *count,
> - u32 *eof);
> + u32 *eof, struct nfsd_file **pnf);
> __be32 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
> loff_t offset, const struct xdr_buf *payload,
> - unsigned long *cnt, int stable, __be32 *verf);
> + unsigned long *cnt, int stable, __be32 *verf,
> + struct nfsd_file **pnf);
> __be32 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp,
> struct nfsd_file *nf, loff_t offset,
> const struct xdr_buf *payload,
> diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
> index 522067b7fd755930a1c2e42b826d9132ac2993be..3f4c51983003188be51a0f8c2db2e0acc9a1363b 100644
> --- a/fs/nfsd/xdr3.h
> +++ b/fs/nfsd/xdr3.h
> @@ -146,6 +146,7 @@ struct nfsd3_readres {
> unsigned long count;
> __u32 eof;
> struct page **pages;
> + struct nfsd_file *nf;
> };
>
> struct nfsd3_writeres {
> @@ -154,6 +155,7 @@ struct nfsd3_writeres {
> unsigned long count;
> int committed;
> __be32 verf[2];
> + struct nfsd_file *nf;
> };
>
> struct nfsd3_renameres {
> @@ -217,6 +219,7 @@ struct nfsd3_commitres {
> __be32 status;
> struct svc_fh fh;
> __be32 verf[2];
> + struct nfsd_file *nf;
> };
>
> struct nfsd3_getaclres {
>
--
Chuck Lever
Powered by blists - more mailing lists