[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <166147984378.25420.4023980607067991846.stgit@noble.brown>
Date: Fri, 26 Aug 2022 12:10:43 +1000
From: NeilBrown <neilb@...e.de>
To: Al Viro <viro@...iv.linux.org.uk>,
Linus Torvalds <torvalds@...ux-foundation.org>,
Daire Byrne <daire@...g.com>,
Trond Myklebust <trond.myklebust@...merspace.com>,
Chuck Lever <chuck.lever@...cle.com>
Cc: Linux NFS Mailing List <linux-nfs@...r.kernel.org>,
linux-fsdevel@...r.kernel.org, LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH 10/10] NFS: support parallel updates in the one directory.
NFS can easily support parallel updates as the locking is done on the
server, so this patch enables parallel updates for NFS.
Handling of silly-rename - both for unlink and for the rename target -
requires some care as we need to get an exclusive lock on the chosen
silly name and for rename we need to keep the original target name
locked after it has been renamed to the silly name.
So nfs_sillyrename() now uses d_exchange() to swap the target and the
silly name after the silly-rename has happened on the server, and the
silly dentry - which now has the name of the target - is returned.
For unlink(), this is immediately unlocked and discarded with a call to
nfs_sillyrename_finish(). For rename it is kept locked until the
originally requested rename completes.
Signed-off-by: NeilBrown <neilb@...e.de>
---
fs/dcache.c | 5 ++++-
fs/nfs/dir.c | 28 ++++++++++++++++------------
fs/nfs/fs_context.c | 6 ++++--
fs/nfs/internal.h | 3 ++-
fs/nfs/unlink.c | 51 ++++++++++++++++++++++++++++++++++++++-------------
5 files changed, 64 insertions(+), 29 deletions(-)
diff --git a/fs/dcache.c b/fs/dcache.c
index 9bf346a9de52..a5eaab16d39f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3056,7 +3056,9 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
write_seqlock(&rename_lock);
WARN_ON(!dentry1->d_inode);
- WARN_ON(!dentry2->d_inode);
+ /* Allow dentry2 to be negative, so we can do a rename
+ * but keep both names locked (DCACHE_PAR_UPDATE)
+ */
WARN_ON(IS_ROOT(dentry1));
WARN_ON(IS_ROOT(dentry2));
@@ -3064,6 +3066,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
write_sequnlock(&rename_lock);
}
+EXPORT_SYMBOL(d_exchange);
/**
* d_ancestor - search for an ancestor
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5d6c2ddc7ea6..fbb608fbe6bf 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1935,8 +1935,12 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
/*
* If we're doing an exclusive create, optimize away the lookup
* but don't hash the dentry.
+ * A silly_rename is marked exclusive, but we need to do an
+ * explicit lookup.
*/
- if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
+ if ((nfs_is_exclusive_create(dir, flags) ||
+ flags & LOOKUP_RENAME_TARGET) &&
+ !(flags & LOOKUP_SILLY_RENAME))
return NULL;
res = ERR_PTR(-ENOMEM);
@@ -2472,10 +2476,14 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
spin_lock(&dentry->d_lock);
if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
&NFS_I(d_inode(dentry))->flags)) {
+ struct dentry *silly;
+
spin_unlock(&dentry->d_lock);
/* Start asynchronous writeout of the inode */
write_inode_now(d_inode(dentry), 0);
- error = nfs_sillyrename(dir, dentry);
+ silly = nfs_sillyrename(dir, dentry);
+ error = PTR_ERR_OR_ZERO(silly);
+ nfs_sillyrename_finish(dir, silly);
goto out;
}
/* We must prevent any concurrent open until the unlink
@@ -2685,16 +2693,12 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
spin_unlock(&new_dentry->d_lock);
- /* copy the target dentry's name */
- dentry = d_alloc(new_dentry->d_parent,
- &new_dentry->d_name);
- if (!dentry)
- goto out;
-
/* silly-rename the existing target ... */
- err = nfs_sillyrename(new_dir, new_dentry);
- if (err)
+ dentry = nfs_sillyrename(new_dir, new_dentry);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
goto out;
+ }
new_dentry = dentry;
new_inode = NULL;
@@ -2750,9 +2754,9 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
} else if (error == -ENOENT)
nfs_dentry_handle_enoent(old_dentry);
- /* new dentry created? */
if (dentry)
- dput(dentry);
+ nfs_sillyrename_finish(new_dir, dentry);
+
return error;
}
EXPORT_SYMBOL_GPL(nfs_rename);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 4da701fd1424..7133ca9433d2 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1577,7 +1577,8 @@ struct file_system_type nfs_fs_type = {
.init_fs_context = nfs_init_fs_context,
.parameters = nfs_fs_parameters,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA|
+ FS_PAR_DIR_UPDATE,
};
MODULE_ALIAS_FS("nfs");
EXPORT_SYMBOL_GPL(nfs_fs_type);
@@ -1589,7 +1590,8 @@ struct file_system_type nfs4_fs_type = {
.init_fs_context = nfs_init_fs_context,
.parameters = nfs_fs_parameters,
.kill_sb = nfs_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_BINARY_MOUNTDATA|
+ FS_PAR_DIR_UPDATE,
};
MODULE_ALIAS_FS("nfs4");
MODULE_ALIAS("nfs4");
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 27c720d71b4e..3a7fd30a8e29 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -611,7 +611,8 @@ extern struct rpc_task *
nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
struct dentry *old_dentry, struct dentry *new_dentry,
void (*complete)(struct rpc_task *, struct nfs_renamedata *));
-extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+extern struct dentry *nfs_sillyrename(struct inode *dir, struct dentry *dentry);
+extern void nfs_sillyrename_finish(struct inode *dir, struct dentry *dentry);
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9697cd5d2561..c8a718f09fe6 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -428,6 +428,10 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
*
* The final cleanup is done during dentry_iput.
*
+ * We exchange the original with the new (silly) dentries, and return
+ * the new dentry which will have the original name. This ensures that
+ * the target name remains locked until the rename completes.
+ *
* (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
* could take responsibility for keeping open files referenced. The server
* would also need to ensure that opened-but-deleted files were kept over
@@ -436,7 +440,7 @@ nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data)
* use to advertise that it does this; some day we may take advantage of
* it.))
*/
-int
+struct dentry *
nfs_sillyrename(struct inode *dir, struct dentry *dentry)
{
static unsigned int sillycounter;
@@ -445,6 +449,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
struct dentry *sdentry;
struct inode *inode = d_inode(dentry);
struct rpc_task *task;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
+ struct path path = {};
int error = -EBUSY;
dfprintk(VFS, "NFS: silly-rename(%pd2, ct=%d)\n",
@@ -459,10 +465,11 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
fileid = NFS_FILEID(d_inode(dentry));
+ path.dentry = d_find_alias(dir);
sdentry = NULL;
do {
int slen;
- dput(sdentry);
+
sillycounter++;
slen = scnprintf(silly, sizeof(silly),
SILLYNAME_PREFIX "%0*llx%0*x",
@@ -472,14 +479,19 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
dfprintk(VFS, "NFS: trying to rename %pd to %s\n",
dentry, silly);
- sdentry = lookup_one_len(silly, dentry->d_parent, slen);
- /*
- * N.B. Better to return EBUSY here ... it could be
- * dangerous to delete the file while it's in use.
- */
- if (IS_ERR(sdentry))
- goto out;
- } while (d_inode(sdentry) != NULL); /* need negative lookup */
+ sdentry = filename_create_one_len(silly, slen,
+ &path,
+ LOOKUP_CREATE | LOOKUP_EXCL |
+ LOOKUP_SILLY_RENAME,
+ &wq);
+ } while (PTR_ERR_OR_ZERO(sdentry) == -EEXIST);
+ dput(path.dentry);
+ /*
+ * N.B. Better to return EBUSY here ... it could be
+ * dangerous to delete the file while it's in use.
+ */
+ if (IS_ERR(sdentry))
+ goto out;
ihold(inode);
@@ -513,7 +525,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
NFS_INO_INVALID_CTIME |
NFS_INO_REVAL_FORCED);
spin_unlock(&inode->i_lock);
- d_move(dentry, sdentry);
+ d_exchange(dentry, sdentry);
break;
case -ERESTARTSYS:
/* The result of the rename is unknown. Play it safe by
@@ -524,7 +536,20 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
rpc_put_task(task);
out_dput:
iput(inode);
- dput(sdentry);
+ if (!error)
+ return sdentry;
+
+ d_lookup_done(sdentry);
+ __done_path_update(&path, sdentry, true, LOOKUP_SILLY_RENAME);
out:
- return error;
+ return ERR_PTR(error);
+}
+
+void nfs_sillyrename_finish(struct inode *dir, struct dentry *dentry)
+{
+ struct path path = { .dentry = d_find_alias(dir) };
+
+ if (!IS_ERR(dentry))
+ __done_path_update(&path, dentry, true, LOOKUP_SILLY_RENAME);
+ dput(path.dentry);
}
Powered by blists - more mailing lists