lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20250206054504.2950516-20-neilb@suse.de>
Date: Thu,  6 Feb 2025 16:42:56 +1100
From: NeilBrown <neilb@...e.de>
To: Alexander Viro <viro@...iv.linux.org.uk>,
	Christian Brauner <brauner@...nel.org>,
	Jan Kara <jack@...e.cz>,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Jeff Layton <jlayton@...nel.org>,
	Dave Chinner <david@...morbit.com>
Cc: linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org
Subject: [PATCH 19/19] nfs: switch to _async for all directory ops.

nfs doesn't benefit from exclusive locking by the VFS as all directory
ops are sent to the server which does any needed locking.

The interesting part is "silly-rename" which needs to create and lock
another dentry while an unlink or rename is happening.

nfs_sillyrename() now returns that locked dentry and
nfs_sillyrename_finish() is added to unlock it when appropriate.

In order to keep all dentries locked until the operation completes,
nfs_sillyrename() now uses d_exchange() to record the silly rename in
the dcache.  This has to be exported and permitted to work on a negative
second dentry.

Signed-off-by: NeilBrown <neilb@...e.de>
---
 fs/dcache.c            |  5 +++-
 fs/nfs/dir.c           | 55 ++++++++++++++++++++++++------------------
 fs/nfs/internal.h      | 20 +++++++++------
 fs/nfs/nfs3proc.c      | 16 ++++++------
 fs/nfs/nfs4_fs.h       |  2 +-
 fs/nfs/nfs4proc.c      | 16 ++++++------
 fs/nfs/proc.c          | 16 ++++++------
 fs/nfs/unlink.c        | 48 +++++++++++++++++++++++++-----------
 include/linux/namei.h  |  1 -
 include/linux/nfs_fs.h |  3 ---
 10 files changed, 106 insertions(+), 76 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 90dee859d138..203d71eb4789 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2981,7 +2981,9 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
 	write_seqlock(&rename_lock);
 
 	WARN_ON(!dentry1->d_inode);
-	WARN_ON(!dentry2->d_inode);
+	/* allow dentry2 to be negative so we can do a rename but keep
+	 * both names locked with DCACHE_PAR_UPDATE.
+	 */
 	WARN_ON(IS_ROOT(dentry1));
 	WARN_ON(IS_ROOT(dentry2));
 
@@ -2989,6 +2991,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
 
 	write_sequnlock(&rename_lock);
 }
+EXPORT_SYMBOL(d_exchange);
 
 /**
  * d_ancestor - search for an ancestor
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c69ec77d02c..c0116d44a6fc 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1956,10 +1956,14 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 		return ERR_PTR(-ENAMETOOLONG);
 
 	/*
-	 * If we're doing an exclusive create, optimize away the lookup
-	 * but don't hash the dentry.
+	 * If we're doing an exclusive create, or if this is the target
+	 * of a rename, optimize away the lookup but don't hash the dentry.
+	 * A silly_rename is uniquely marked exclusive (REALLY? FIXME) and a rename target,
+	 * sand it request and explicit lookup.
 	 */
-	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
+	if (nfs_is_exclusive_create(dir, flags) || (flags & LOOKUP_RENAME_TARGET &&
+	    ((flags & (LOOKUP_EXCL | LOOKUP_RENAME_TARGET)) !=
+	     (LOOKUP_EXCL | LOOKUP_RENAME_TARGET))))
 		return NULL;
 
 	res = ERR_PTR(-ENOMEM);
@@ -2057,7 +2061,7 @@ static int nfs_finish_open(struct nfs_open_context *ctx,
 
 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 		    struct file *file, unsigned open_flags,
-		    umode_t mode)
+		    umode_t mode, struct dirop_ret *ret)
 {
 	struct nfs_open_context *ctx;
 	struct dentry *res;
@@ -2256,7 +2260,7 @@ nfs4_lookup_revalidate(struct inode *dir, const struct qstr *name,
 
 int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
 			struct file *file, unsigned int open_flags,
-			umode_t mode)
+			umode_t mode, struct dirop_ret *ret)
 {
 
 	/* Same as look+open from lookup_open(), but with different O_TRUNC
@@ -2383,7 +2387,8 @@ static int nfs_do_create(struct inode *dir, struct dentry *dentry,
 }
 
 int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
-	       struct dentry *dentry, umode_t mode, bool excl)
+	       struct dentry *dentry, umode_t mode, bool excl,
+	       struct dirop_ret *ret)
 {
 	return nfs_do_create(dir, dentry, mode, excl ? O_EXCL : 0);
 }
@@ -2394,7 +2399,8 @@ EXPORT_SYMBOL_GPL(nfs_create);
  */
 int
 nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
-	  struct dentry *dentry, umode_t mode, dev_t rdev)
+	  struct dentry *dentry, umode_t mode, dev_t rdev,
+	  struct dirop_ret *ret)
 {
 	struct iattr attr;
 	int status;
@@ -2466,7 +2472,7 @@ static void nfs_dentry_remove_handle_error(struct inode *dir,
 	}
 }
 
-int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+int nfs_rmdir(struct inode *dir, struct dentry *dentry, struct dirop_ret *ret)
 {
 	int error;
 
@@ -2535,7 +2541,7 @@ static int nfs_safe_remove(struct dentry *dentry)
  *
  *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
  */
-int nfs_unlink(struct inode *dir, struct dentry *dentry)
+int nfs_unlink(struct inode *dir, struct dentry *dentry, struct dirop_ret *ret)
 {
 	int error;
 
@@ -2546,10 +2552,14 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	spin_lock(&dentry->d_lock);
 	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
 					     &NFS_I(d_inode(dentry))->flags)) {
+		struct dentry *silly;
+
 		spin_unlock(&dentry->d_lock);
 		/* Start asynchronous writeout of the inode */
 		write_inode_now(d_inode(dentry), 0);
-		error = nfs_sillyrename(dir, dentry);
+		silly = nfs_sillyrename(dir, dentry);
+		nfs_sillyrename_finish(silly);
+		error = PTR_ERR_OR_ZERO(silly);
 		goto out;
 	}
 	/* We must prevent any concurrent open until the unlink
@@ -2591,7 +2601,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
  * and move the raw page into its mapping.
  */
 int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
-		struct dentry *dentry, const char *symname)
+		struct dentry *dentry, const char *symname, struct dirop_ret *ret)
 {
 	struct folio *folio;
 	char *kaddr;
@@ -2647,7 +2657,8 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
 EXPORT_SYMBOL_GPL(nfs_symlink);
 
 int
-nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry,
+	 struct dirop_ret *ret)
 {
 	struct inode *inode = d_inode(old_dentry);
 	int error;
@@ -2688,7 +2699,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
  * file in old_dir will go away when the last process iput()s the inode.
  *
  * FIXED.
- * 
+ *
  * It actually works quite well. One needs to have the possibility for
  * at least one ".nfs..." file in each directory the file ever gets
  * moved or linked to which happens automagically with the new
@@ -2704,7 +2715,8 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
  */
 int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	       struct dentry *old_dentry, struct inode *new_dir,
-	       struct dentry *new_dentry, unsigned int flags)
+	       struct dentry *new_dentry, unsigned int flags,
+	       struct dirop_ret *ret)
 {
 	struct inode *old_inode = d_inode(old_dentry);
 	struct inode *new_inode = d_inode(new_dentry);
@@ -2744,16 +2756,12 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 
 			spin_unlock(&new_dentry->d_lock);
 
-			/* copy the target dentry's name */
-			dentry = d_alloc(new_dentry->d_parent,
-					 &new_dentry->d_name);
-			if (!dentry)
-				goto out;
-
 			/* silly-rename the existing target ... */
-			err = nfs_sillyrename(new_dir, new_dentry);
-			if (err)
+			dentry = nfs_sillyrename(new_dir, new_dentry);
+			if (IS_ERR(dentry)) {
+				err = PTR_ERR(dentry);
 				goto out;
+			}
 
 			new_dentry = dentry;
 			new_inode = NULL;
@@ -2811,9 +2819,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 	} else if (error == -ENOENT)
 		nfs_dentry_handle_enoent(old_dentry);
 
-	/* new dentry created? */
 	if (dentry)
-		dput(dentry);
+		nfs_sillyrename_finish(dentry);
 	return error;
 }
 EXPORT_SYMBOL_GPL(nfs_rename);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f7dea7fe5ebc..ba00ffeb70ac 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -399,18 +399,21 @@ extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
 struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
 void nfs_d_prune_case_insensitive_aliases(struct inode *inode);
 int nfs_create(struct mnt_idmap *, struct inode *, struct dentry *,
-	       umode_t, bool);
+	       umode_t, bool, struct dirop_ret *);
 struct dentry *nfs_mkdir(struct mnt_idmap *, struct inode *, struct dentry *,
 			 umode_t, struct dirop_ret *);
-int nfs_rmdir(struct inode *, struct dentry *);
-int nfs_unlink(struct inode *, struct dentry *);
+int nfs_rmdir(struct inode *, struct dentry *, struct dirop_ret *);
+int nfs_unlink(struct inode *, struct dentry *, struct dirop_ret *);
 int nfs_symlink(struct mnt_idmap *, struct inode *, struct dentry *,
-		const char *);
-int nfs_link(struct dentry *, struct inode *, struct dentry *);
+		const char *, struct dirop_ret *);
+int nfs_link(struct dentry *, struct inode *, struct dentry *, struct dirop_ret *);
 int nfs_mknod(struct mnt_idmap *, struct inode *, struct dentry *, umode_t,
-	      dev_t);
+	      dev_t, struct dirop_ret *);
 int nfs_rename(struct mnt_idmap *, struct inode *, struct dentry *,
-	       struct inode *, struct dentry *, unsigned int);
+	       struct inode *, struct dentry *, unsigned int, struct dirop_ret *);
+int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
+			struct file *file, unsigned int open_flags,
+			umode_t mode, struct dirop_ret *);
 
 #ifdef CONFIG_NFS_V4_2
 static inline __u32 nfs_access_xattr_mask(const struct nfs_server *server)
@@ -707,7 +710,8 @@ extern struct rpc_task *
 nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
 		 struct dentry *old_dentry, struct dentry *new_dentry,
 		 void (*complete)(struct rpc_task *, struct nfs_renamedata *));
-extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+extern struct dentry *nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+extern void nfs_sillyrename_finish(struct dentry *dentry);
 
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 41797cbbb8dc..833e679d0a2b 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -1032,16 +1032,16 @@ static int nfs3_return_delegation(struct inode *inode)
 }
 
 static const struct inode_operations nfs3_dir_inode_operations = {
-	.create		= nfs_create,
-	.atomic_open	= nfs_atomic_open_v23,
+	.create_async	= nfs_create,
+	.atomic_open_async = nfs_atomic_open_v23,
 	.lookup		= nfs_lookup,
-	.link		= nfs_link,
-	.unlink		= nfs_unlink,
-	.symlink	= nfs_symlink,
+	.link_async	= nfs_link,
+	.unlink_async	= nfs_unlink,
+	.symlink_async	= nfs_symlink,
 	.mkdir_async	= nfs_mkdir,
-	.rmdir		= nfs_rmdir,
-	.mknod		= nfs_mknod,
-	.rename		= nfs_rename,
+	.rmdir_async	= nfs_rmdir,
+	.mknod_async	= nfs_mknod,
+	.rename_async	= nfs_rename,
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7d383d29a995..65fbcef5830e 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -273,7 +273,7 @@ extern const struct dentry_operations nfs4_dentry_operations;
 
 /* dir.c */
 int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
-		    unsigned, umode_t);
+		    unsigned, umode_t, struct dirop_ret *);
 
 /* fs_context.c */
 extern struct file_system_type nfs4_fs_type;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ef219968ed22..4fd312838bd3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10878,16 +10878,16 @@ static void nfs4_disable_swap(struct inode *inode)
 }
 
 static const struct inode_operations nfs4_dir_inode_operations = {
-	.create		= nfs_create,
+	.create_async	= nfs_create,
 	.lookup		= nfs_lookup,
-	.atomic_open	= nfs_atomic_open,
-	.link		= nfs_link,
-	.unlink		= nfs_unlink,
-	.symlink	= nfs_symlink,
+	.atomic_open_async = nfs_atomic_open,
+	.link_async	= nfs_link,
+	.unlink_async	= nfs_unlink,
+	.symlink_async	= nfs_symlink,
 	.mkdir_async	= nfs_mkdir,
-	.rmdir		= nfs_rmdir,
-	.mknod		= nfs_mknod,
-	.rename		= nfs_rename,
+	.rmdir_async	= nfs_rmdir,
+	.mknod_async	= nfs_mknod,
+	.rename_async	= nfs_rename,
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 7e8f6d8f02b4..211edd9f5115 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -704,16 +704,16 @@ static int nfs_return_delegation(struct inode *inode)
 }
 
 static const struct inode_operations nfs_dir_inode_operations = {
-	.create		= nfs_create,
+	.create_async	= nfs_create,
 	.lookup		= nfs_lookup,
-	.atomic_open	= nfs_atomic_open_v23,
-	.link		= nfs_link,
-	.unlink		= nfs_unlink,
-	.symlink	= nfs_symlink,
+	.atomic_open_async = nfs_atomic_open_v23,
+	.link_async	= nfs_link,
+	.unlink_async	= nfs_unlink,
+	.symlink_async	= nfs_symlink,
 	.mkdir_async	= nfs_mkdir,
-	.rmdir		= nfs_rmdir,
-	.mknod		= nfs_mknod,
-	.rename		= nfs_rename,
+	.rmdir_async	= nfs_rmdir,
+	.mknod_async	= nfs_mknod,
+	.rename_async	= nfs_rename,
 	.permission	= nfs_permission,
 	.getattr	= nfs_getattr,
 	.setattr	= nfs_setattr,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index d44162d3a8f1..06b71ec9520c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -430,6 +430,10 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
  *
  * The final cleanup is done during dentry_iput.
  *
+ * We exchange the original with the new (silly) dentries, and return
+ * the new dentry which will now have the original name.  This ensures that
+ * the target name remains locked until the rename completes.
+ *
  * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
  * could take responsibility for keeping open files referenced.  The server
  * would also need to ensure that opened-but-deleted files were kept over
@@ -438,7 +442,7 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
  * use to advertise that it does this; some day we may take advantage of
  * it.))
  */
-int
+struct dentry *
 nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 {
 	static unsigned int sillycounter;
@@ -447,7 +451,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	struct dentry *sdentry;
 	struct inode *inode = d_inode(dentry);
 	struct rpc_task *task;
-	int            error = -EBUSY;
+	struct dentry *base;
+	int error = -EBUSY;
 
 	dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
 		dentry, d_count(dentry));
@@ -461,10 +466,11 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 
 	fileid = NFS_FILEID(d_inode(dentry));
 
+	base = d_find_alias(dir);
 	sdentry = NULL;
 	do {
 		int slen;
-		dput(sdentry);
+
 		sillycounter++;
 		slen = scnprintf(silly, sizeof(silly),
 				SILLYNAME_PREFIX "%0*llx%0*x",
@@ -474,14 +480,19 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 		dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
 				dentry, silly);
 
-		sdentry = lookup_one_len(silly, dentry->d_parent, slen);
-		/*
-		 * N.B. Better to return EBUSY here ... it could be
-		 * dangerous to delete the file while it's in use.
-		 */
-		if (IS_ERR(sdentry))
-			goto out;
-	} while (d_inode(sdentry) != NULL); /* need negative lookup */
+		sdentry = lookup_and_lock_one(NULL, silly, slen,
+					      base,
+					      LOOKUP_CREATE | LOOKUP_EXCL
+					      | LOOKUP_RENAME_TARGET
+					      | LOOKUP_PARENT_LOCKED);
+	} while (PTR_ERR_OR_ZERO(sdentry) == -EEXIST); /* need negative lookup */
+	dput(base);
+	/*
+	 * N.B. Better to return EBUSY here ... it could be
+	 * dangerous to delete the file while it's in use.
+	 */
+	if (IS_ERR(sdentry))
+		goto out;
 
 	ihold(inode);
 
@@ -515,7 +526,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 						     NFS_INO_INVALID_CTIME |
 						     NFS_INO_REVAL_FORCED);
 		spin_unlock(&inode->i_lock);
-		d_move(dentry, sdentry);
+		d_exchange(dentry, sdentry);
 		break;
 	case -ERESTARTSYS:
 		/* The result of the rename is unknown. Play it safe by
@@ -526,7 +537,16 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
 	rpc_put_task(task);
 out_dput:
 	iput(inode);
-	dput(sdentry);
+	if (!error)
+		return dentry;
+	done_lookup_and_lock(NULL, sdentry, LOOKUP_PARENT_LOCKED);
+
 out:
-	return error;
+	return ERR_PTR(error);
+}
+
+void nfs_sillyrename_finish(struct dentry *dentry)
+{
+	if (!IS_ERR(dentry))
+		done_lookup_and_lock(NULL, dentry, LOOKUP_PARENT_LOCKED);
 }
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 8ef7aa6ed64c..29903e2cdf97 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -95,7 +95,6 @@ struct dentry *__lookup_and_lock_one(struct mnt_idmap *idmap,
 				     unsigned int lookup_flags);
 void done_lookup_and_lock(struct dentry *base, struct dentry *dentry,
 			  unsigned int lookup_flags);
-void __done_lookup_and_lock(struct dentry *dentry);
 
 extern int follow_down_one(struct path *);
 extern int follow_down(struct path *path, unsigned int flags);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 67ae2c3f41d2..6f9f4adfdf4c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -579,9 +579,6 @@ extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openfl
 extern void nfs_access_zap_cache(struct inode *inode);
 extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
 				 u32 *mask, bool may_block);
-extern int nfs_atomic_open_v23(struct inode *dir, struct dentry *dentry,
-			       struct file *file, unsigned int open_flags,
-			       umode_t mode);
 
 /*
  * linux/fs/nfs/symlink.c
-- 
2.47.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ