[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <165516230200.21248.14713533079253477888.stgit@noble.brown>
Date: Tue, 14 Jun 2022 09:18:22 +1000
From: NeilBrown <neilb@...e.de>
To: Al Viro <viro@...iv.linux.org.uk>, Daire Byrne <daire@...g.com>,
Trond Myklebust <trond.myklebust@...merspace.com>,
Chuck Lever <chuck.lever@...cle.com>
Cc: Linux NFS Mailing List <linux-nfs@...r.kernel.org>,
linux-fsdevel@...r.kernel.org, LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH 07/12] NFS: support parallel updates in the one directory.
NFS can easily support parallel updates as the locking is done on the
server, so this patch enables parallel updates for NFS.
NFS unlink needs to block concurrent opens() once it decides to actually
unlink the file, rather than rename it to .nfsXXXX (aka sillyrename).
It currently does this by temporarily unhashing the dentry and relying
on the exclusive lock on the directory to block a ->lookup(). That
doesn't work now that unlink uses a shared lock, so an alternate
approach is needed.
__nfs_lookup_revalidate (->d_revalidate) now blocks if DCACHE_PAR_UPDATE
is set, and if nfs_unlink() happens to be called with an exclusive lock
and DCACHE_PAR_UPDATE is not set, it get set during the potential race window.
I'd rather use some other indicator in the dentry to tell
_nfs_lookup_revalidate() to wait, but we are nearly out of d_flags bits,
and NFS doesn't have a general-purpose d_fsdata.
NFS "silly-rename" may now be called with only a shared lock on the
directory, so it needs a bit of extra care to get exclusive access to
the new name. d_lock_update_nested() and d_unlock_update() help here.
Signed-off-by: NeilBrown <neilb@...e.de>
---
fs/nfs/dir.c | 29 +++++++++++++++++++++++------
fs/nfs/inode.c | 2 ++
fs/nfs/unlink.c | 5 ++++-
3 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a8ecdd527662..54c2c7adcd56 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1778,6 +1778,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
int ret;
if (flags & LOOKUP_RCU) {
+ if (dentry->d_flags & DCACHE_PAR_UPDATE)
+ /* Pending unlink */
+ return -ECHILD;
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
@@ -1786,6 +1789,9 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
} else {
+ /* Wait for unlink to complete */
+ wait_var_event(&dentry->d_flags,
+ !(dentry->d_flags & DCACHE_PAR_UPDATE));
parent = dget_parent(dentry);
ret = reval(d_inode(parent), dentry, flags);
dput(parent);
@@ -2453,7 +2459,7 @@ static int nfs_safe_remove(struct dentry *dentry)
int nfs_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
- int need_rehash = 0;
+ bool did_set_par_update = false;
dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
dir->i_ino, dentry);
@@ -2468,15 +2474,26 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
error = nfs_sillyrename(dir, dentry);
goto out;
}
- if (!d_unhashed(dentry)) {
- __d_drop(dentry);
- need_rehash = 1;
+ /* We must prevent any concurrent open until the unlink
+ * completes. ->d_revalidate will wait for DCACHE_PAR_UPDATE
+ * to clear, but if this happens to a non-parallel update, we
+ * still want to block opens. So set DCACHE_PAR_UPDATE
+ * temporarily.
+ */
+ if (!(dentry->d_flags & DCACHE_PAR_UPDATE)) {
+ /* Must have exclusive lock on parent */
+ did_set_par_update = true;
+ dentry->d_flags |= DCACHE_PAR_UPDATE;
}
+
spin_unlock(&dentry->d_lock);
error = nfs_safe_remove(dentry);
nfs_dentry_remove_handle_error(dir, dentry, error);
- if (need_rehash)
- d_rehash(dentry);
+ if (did_set_par_update) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_PAR_UPDATE;
+ spin_unlock(&dentry->d_lock);
+ }
out:
trace_nfs_unlink_exit(dir, dentry, error);
return error;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b4e46b0ffa2d..cea2554710d2 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -481,6 +481,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
/* We can't support update_atime(), since the server will reset it */
inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ /* Parallel updates to directories are trivial */
+ inode->i_flags |= S_PAR_UPDATE;
inode->i_mode = fattr->mode;
nfsi->cache_validity = 0;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9697cd5d2561..52a20eb6131c 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -462,6 +462,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
sdentry = NULL;
do {
int slen;
+ d_unlock_update(sdentry);
dput(sdentry);
sillycounter++;
slen = scnprintf(silly, sizeof(silly),
@@ -479,7 +480,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
*/
if (IS_ERR(sdentry))
goto out;
- } while (d_inode(sdentry) != NULL); /* need negative lookup */
+ } while (!d_lock_update_nested(sdentry, NULL, NULL,
+ SINGLE_DEPTH_NESTING));
ihold(inode);
@@ -524,6 +526,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
rpc_put_task(task);
out_dput:
iput(inode);
+ d_unlock_update(sdentry);
dput(sdentry);
out:
return error;
Powered by blists - more mailing lists