[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <b718b3c2-d19e-42c7-a29e-5e07319e2439@oracle.com>
Date: Thu, 20 Nov 2025 15:09:26 -0500
From: Chuck Lever <chuck.lever@...cle.com>
To: Benjamin Coddington <bcodding@...merspace.com>,
Alexander Viro <viro@...iv.linux.org.uk>,
Christian Brauner <brauner@...nel.org>, Jan Kara <jack@...e.cz>,
Jeff Layton <jlayton@...nel.org>, NeilBrown <neil@...wn.name>,
Olga Kornievskaia <okorniev@...hat.com>, Dai Ngo <Dai.Ngo@...cle.com>,
Tom Talpey <tom@...pey.com>, Trond Myklebust <trondmy@...nel.org>,
Mike Snitzer <snitzer@...nel.org>
Cc: linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-nfs@...r.kernel.org
Subject: Re: [PATCH v2 3/3] VFS/knfsd: Teach dentry_create() to use
atomic_open()
On 11/20/25 10:57 AM, Benjamin Coddington wrote:
> While knfsd offers combined exclusive create and open results to clients,
> on some filesystems those results may not be atomic. This behavior can be
> observed. For example, an open O_CREAT with mode 0 will succeed in creating
> the file but unexpectedly return -EACCES from vfs_open().
>
> Additionally reducing the number of remote RPC calls required for O_CREAT
> on network filesystem provides a performance benefit in the open path.
>
> Teach knfsd's helper dentry_create() to use atomic_open() for filesystems
> that support it. The previously const @path is passed up to atomic_open()
> and may be modified depending on whether an existing entry was found or if
> the atomic_open() returned an error and consumed the passed-in dentry.
>
> Signed-off-by: Benjamin Coddington <bcodding@...merspace.com>
> Reviewed-by: Jeff Layton <jlayton@...nel.org>
> ---
> fs/namei.c | 46 +++++++++++++++++++++++++++++++++++++++-------
> fs/nfsd/nfs4proc.c | 8 +++++---
> include/linux/fs.h | 2 +-
> 3 files changed, 45 insertions(+), 11 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 9c0aad5bbff7..941b9fcebd1b 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -4200,6 +4200,9 @@ EXPORT_SYMBOL(user_path_create);
> *
> * Caller must hold the parent directory's lock, and have prepared
> * a negative dentry, placed in @path->dentry, for the new file.
> + * If the file was looked up only or didn't need to be created,
> + * FMODE_OPENED will not be set, and @path will be updated with the
> + * new dentry. The dentry may be negative.
> *
> * Caller sets @path->mnt to the vfsmount of the filesystem where
> * the new file is to be created. The parent directory and the
> @@ -4208,21 +4211,50 @@ EXPORT_SYMBOL(user_path_create);
> * On success, returns a "struct file *". Otherwise a ERR_PTR
> * is returned.
> */
> -struct file *dentry_create(const struct path *path, int flags, umode_t mode,
> +struct file *dentry_create(struct path *path, int flags, umode_t mode,
> const struct cred *cred)
> {
> + struct dentry *dentry = path->dentry;
> + struct dentry *dir = dentry->d_parent;
> + struct inode *dir_inode = d_inode(dir);
> + struct mnt_idmap *idmap;
> struct file *file;
> - int error;
> + int error, create_error;
>
> file = alloc_empty_file(flags, cred);
> if (IS_ERR(file))
> return file;
>
> - error = vfs_create(mnt_idmap(path->mnt),
> - d_inode(path->dentry->d_parent),
> - path->dentry, mode, true);
> - if (!error)
> - error = vfs_open(path, file);
> + idmap = mnt_idmap(path->mnt);
> +
> + if (dir_inode->i_op->atomic_open) {
> + path->dentry = dir;
> + mode = vfs_prepare_mode(idmap, dir_inode, mode, S_IALLUGO, S_IFREG);
> +
> + create_error = may_o_create(idmap, path, dentry, mode);
> + if (create_error)
> + flags &= ~O_CREAT;
> +
> + dentry = atomic_open(path, dentry, file, flags, mode);
> + error = PTR_ERR_OR_ZERO(dentry);
> +
> + if (unlikely(create_error) && error == -ENOENT)
> + error = create_error;
> +
> + if (!error) {
> + if (file->f_mode & FMODE_CREATED)
> + fsnotify_create(dir->d_inode, dentry);
> + if (file->f_mode & FMODE_OPENED)
> + fsnotify_open(file);
> + }
> +
> + path->dentry = dentry;
When atomic_open() fails, it returns ERR_PTR. Then path->dentry gets set
to ERR_PTR unconditionally here.
Should path->dentry restoration be conditional, only updating on
success? Or perhaps should the original dentry be preserved in the local
variable and restored on error?
> +
> + } else {
> + error = vfs_create(idmap, dir_inode, dentry, mode, true);
> + if (!error)
> + error = vfs_open(path, file);
Revisiting this, I wonder if the non-atomic error flow needs specific
code to clean up after creation/open failures.
> + }
>
> if (unlikely(error)) {
> fput(file);
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 71b428efcbb5..7ff7e5855e58 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -194,7 +194,7 @@ static inline bool nfsd4_create_is_exclusive(int createmode)
> }
>
> static __be32
> -nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
> +nfsd4_vfs_create(struct svc_fh *fhp, struct dentry **child,
> struct nfsd4_open *open)
> {
> struct file *filp;
> @@ -214,9 +214,11 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
> }
>
> path.mnt = fhp->fh_export->ex_path.mnt;
> - path.dentry = child;
> + path.dentry = *child;
> filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
> current_cred());
> + *child = path.dentry;
> +
> if (IS_ERR(filp))
> return nfserrno(PTR_ERR(filp));
>
> @@ -353,7 +355,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
> status = fh_fill_pre_attrs(fhp);
> if (status != nfs_ok)
> goto out;
> - status = nfsd4_vfs_create(fhp, child, open);
> + status = nfsd4_vfs_create(fhp, &child, open);
> if (status != nfs_ok)
> goto out;
> open->op_created = true;
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 601d036a6c78..772b734477e5 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -2878,7 +2878,7 @@ struct file *dentry_open(const struct path *path, int flags,
> const struct cred *creds);
> struct file *dentry_open_nonotify(const struct path *path, int flags,
> const struct cred *cred);
> -struct file *dentry_create(const struct path *path, int flags, umode_t mode,
> +struct file *dentry_create(struct path *path, int flags, umode_t mode,
> const struct cred *cred);
> struct path *backing_file_user_path(const struct file *f);
>
Reviewed-by: Chuck Lever <chuck.lever@...cle.com>
--
Chuck Lever
Powered by blists - more mailing lists