[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <165516230199.21248.18142980966152036732.stgit@noble.brown>
Date: Tue, 14 Jun 2022 09:18:22 +1000
From: NeilBrown <neilb@...e.de>
To: Al Viro <viro@...iv.linux.org.uk>, Daire Byrne <daire@...g.com>,
Trond Myklebust <trond.myklebust@...merspace.com>,
Chuck Lever <chuck.lever@...cle.com>
Cc: Linux NFS Mailing List <linux-nfs@...r.kernel.org>,
linux-fsdevel@...r.kernel.org, LKML <linux-kernel@...r.kernel.org>
Subject: [PATCH 06/12] VFS: support concurrent renames.
An object can now be renamed from or to a directory in which a create or
unlink is concurrently happening.
Two or more renames with the one directory can also be concurrent.
There is still only one cross-directory rename permitted at a time.
Signed-off-by: NeilBrown <neilb@...e.de>
---
fs/namei.c | 230 ++++++++++++++++++++++++++++++++++++++++++++-----
include/linux/namei.h | 9 ++
2 files changed, 216 insertions(+), 23 deletions(-)
diff --git a/fs/namei.c b/fs/namei.c
index 8ce7aa16b704..31ba4dbedfcf 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3172,6 +3172,197 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
}
EXPORT_SYMBOL(unlock_rename);
+struct dentry *lock_rename_lookup_excl(struct dentry *p1, struct dentry *p2,
+ struct dentry **d1p, struct dentry **d2p,
+ struct qstr *last1, struct qstr *last2,
+ unsigned int flags1, unsigned int flags2)
+{
+ struct dentry *p;
+ struct dentry *d1, *d2;
+
+ if (p1 == p2) {
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ d1 = __lookup_hash(last1, p1, flags1, NULL);
+ if (IS_ERR(d1))
+ goto out_unlock_1;
+ d2 = __lookup_hash(last2, p2, flags2, NULL);
+ if (IS_ERR(d2))
+ goto out_unlock_2;
+ *d1p = d1; *d2p = d2;
+ return NULL;
+ out_unlock_2:
+ dput(d1);
+ d1 = d2;
+ out_unlock_1:
+ inode_unlock(p1->d_inode);
+ return d1;
+ }
+
+ mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
+
+ if ((p = d_ancestor(p2, p1)) != NULL) {
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
+ } else if ((p = d_ancestor(p1, p2)) != NULL) {
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
+ } else {
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ }
+ d1 = __lookup_hash(last1, p1, flags1, NULL);
+ if (IS_ERR(d1))
+ goto unlock_out_3;
+ d2 = __lookup_hash(last2, p2, flags2, NULL);
+ if (IS_ERR(d2))
+ goto unlock_out_4;
+
+ *d1p = d1;
+ *d2p = d2;
+ return p;
+unlock_out_4:
+ dput(d1);
+ d1 = d2;
+unlock_out_3:
+ inode_unlock(p1->d_inode);
+ inode_unlock(p2->d_inode);
+ mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
+ return d1;
+}
+
+static struct dentry *lock_rename_lookup(struct dentry *p1, struct dentry *p2,
+ struct dentry **d1p, struct dentry **d2p,
+ struct qstr *last1, struct qstr *last2,
+ unsigned int flags1, unsigned int flags2,
+ wait_queue_head_t *wq)
+{
+ struct dentry *p;
+ struct dentry *d1, *d2;
+ bool ok1, ok2;
+
+ if (!wq || (p1->d_inode->i_flags & S_PAR_UPDATE) == 0)
+ return lock_rename_lookup_excl(p1, p2, d1p, d2p, last1, last2,
+ flags1, flags2);
+
+ if (p1 == p2) {
+ inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT);
+ retry:
+ d1 = __lookup_hash(last1, p1, flags1, wq);
+ if (IS_ERR(d1))
+ goto out_unlock_1;
+ d2 = __lookup_hash(last2, p2, flags2, wq);
+ if (IS_ERR(d2))
+ goto out_unlock_2;
+ *d1p = d1; *d2p = d2;
+
+ if (d1 < d2) {
+ ok1 = d_lock_update(d1, p1, last1);
+ ok2 = d_lock_update(d2, p2, last2);
+ } else {
+ ok2 = d_lock_update(d2, p2, last2);
+ ok1 = d_lock_update(d1, p1, last1);
+ }
+ if (!ok1 || !ok2) {
+ if (ok1)
+ d_unlock_update(d1);
+ if (ok2)
+ d_unlock_update(d2);
+ dput(d1);
+ dput(d2);
+ goto retry;
+ }
+ return NULL;
+ out_unlock_2:
+ dput(d1);
+ d1 = d2;
+ out_unlock_1:
+ inode_unlock_shared(p1->d_inode);
+ return d1;
+ }
+
+ mutex_lock(&p1->d_sb->s_vfs_rename_mutex);
+
+ if ((p = d_ancestor(p2, p1)) != NULL) {
+ inode_lock_shared_nested(p2->d_inode, I_MUTEX_PARENT);
+ inode_lock_shared_nested(p1->d_inode, I_MUTEX_CHILD);
+ } else if ((p = d_ancestor(p1, p2)) != NULL) {
+ inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_shared_nested(p2->d_inode, I_MUTEX_CHILD);
+ } else {
+ inode_lock_shared_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_shared_nested(p2->d_inode, I_MUTEX_PARENT2);
+ }
+retry2:
+ d1 = __lookup_hash(last1, p1, flags1, wq);
+ if (IS_ERR(d1))
+ goto unlock_out_3;
+ d2 = __lookup_hash(last2, p2, flags2, wq);
+ if (IS_ERR(d2))
+ goto unlock_out_4;
+
+ ok1 = d_lock_update(d1, p1, last1);
+ ok2 = d_lock_update(d2, p2, last2);
+ if (!ok1 || !ok2) {
+ if (ok1)
+ d_unlock_update(d1);
+ if (ok2)
+ d_unlock_update(d2);
+ dput(d1);
+ dput(d2);
+ goto retry2;
+ }
+ *d1p = d1;
+ *d2p = d2;
+ return p;
+unlock_out_4:
+ dput(d1);
+ d1 = d2;
+unlock_out_3:
+ inode_unlock_shared(p1->d_inode);
+ inode_unlock_shared(p2->d_inode);
+ mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
+ return d1;
+}
+
+struct dentry *lock_rename_lookup_one(struct dentry *p1, struct dentry *p2,
+ struct dentry **d1p, struct dentry **d2p,
+ const char *name1, int nlen1,
+ const char *name2, int nlen2,
+ unsigned int flags1, unsigned int flags2,
+ wait_queue_head_t *wq)
+{
+ struct qstr this1, this2;
+ int err;
+
+ err = lookup_one_common(&init_user_ns, name1, p1, nlen1, &this1);
+ if (err)
+ return ERR_PTR(err);
+ err = lookup_one_common(&init_user_ns, name2, p2, nlen2, &this2);
+ if (err)
+ return ERR_PTR(err);
+ return lock_rename_lookup(p1, p2, d1p, d2p, &this1, &this2,
+ flags1, flags2, wq);
+}
+EXPORT_SYMBOL(lock_rename_lookup_one);
+
+void unlock_rename_lookup(struct dentry *p1, struct dentry *p2,
+ struct dentry *d1, struct dentry *d2)
+{
+ if (d1->d_flags & DCACHE_PAR_UPDATE) {
+ d_lookup_done(d1);
+ d_lookup_done(d2);
+ inode_unlock_shared(p1->d_inode);
+ if (p1 != p2) {
+ inode_unlock_shared(p2->d_inode);
+ mutex_unlock(&p1->d_sb->s_vfs_rename_mutex);
+ }
+ } else
+ unlock_rename(p1, p2);
+ dput(d1);
+ dput(d2);
+}
+EXPORT_SYMBOL(unlock_rename_lookup);
+
/**
* vfs_create - create new file
* @mnt_userns: user namespace of the mount the inode was found from
@@ -4910,6 +5101,7 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET;
bool should_retry = false;
int error = -EINVAL;
+ DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
goto put_names;
@@ -4950,58 +5142,53 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
goto exit2;
retry_deleg:
- trap = lock_rename(new_path.dentry, old_path.dentry);
-
- old_dentry = __lookup_hash(&old_last, old_path.dentry,
- lookup_flags, NULL);
- error = PTR_ERR(old_dentry);
- if (IS_ERR(old_dentry))
+ trap = lock_rename_lookup(new_path.dentry, old_path.dentry,
+ &new_dentry, &old_dentry,
+ &new_last, &old_last,
+ lookup_flags | target_flags, lookup_flags,
+ &wq);
+ if (IS_ERR(trap))
goto exit3;
/* source must exist */
error = -ENOENT;
if (d_is_negative(old_dentry))
goto exit4;
- new_dentry = __lookup_hash(&new_last, new_path.dentry,
- lookup_flags | target_flags, NULL);
- error = PTR_ERR(new_dentry);
- if (IS_ERR(new_dentry))
- goto exit4;
error = -EEXIST;
if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry))
- goto exit5;
+ goto exit4;
if (flags & RENAME_EXCHANGE) {
error = -ENOENT;
if (d_is_negative(new_dentry))
- goto exit5;
+ goto exit4;
if (!d_is_dir(new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])
- goto exit5;
+ goto exit4;
}
}
/* unless the source is a directory trailing slashes give -ENOTDIR */
if (!d_is_dir(old_dentry)) {
error = -ENOTDIR;
if (old_last.name[old_last.len])
- goto exit5;
+ goto exit4;
if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
- goto exit5;
+ goto exit4;
}
/* source should not be ancestor of target */
error = -EINVAL;
if (old_dentry == trap)
- goto exit5;
+ goto exit4;
/* target should not be an ancestor of source */
if (!(flags & RENAME_EXCHANGE))
error = -ENOTEMPTY;
if (new_dentry == trap)
- goto exit5;
+ goto exit4;
error = security_path_rename(&old_path, old_dentry,
&new_path, new_dentry, flags);
if (error)
- goto exit5;
+ goto exit4;
rd.old_dir = old_path.dentry->d_inode;
rd.old_dentry = old_dentry;
@@ -5012,12 +5199,9 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
rd.delegated_inode = &delegated_inode;
rd.flags = flags;
error = vfs_rename(&rd);
-exit5:
- dput(new_dentry);
exit4:
- dput(old_dentry);
+ unlock_rename_lookup(new_path.dentry, old_path.dentry, new_dentry, old_dentry);
exit3:
- unlock_rename(new_path.dentry, old_path.dentry);
if (delegated_inode) {
error = break_deleg_wait(&delegated_inode);
if (!error)
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 217aa6de9f25..b1ea89568de8 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -101,6 +101,15 @@ extern int follow_up(struct path *);
extern struct dentry *lock_rename(struct dentry *, struct dentry *);
extern void unlock_rename(struct dentry *, struct dentry *);
+extern struct dentry *lock_rename_lookup_one(
+ struct dentry *p1, struct dentry *p2,
+ struct dentry **d1p, struct dentry **d2p,
+ const char *name1, int nlen1,
+ const char *name2, int nlen2,
+ unsigned int flags1, unsigned int flags2,
+ wait_queue_head_t *wq);
+extern void unlock_rename_lookup(struct dentry *p1, struct dentry *p2,
+ struct dentry *d1, struct dentry *d2);
extern int __must_check nd_jump_link(struct path *path);
Powered by blists - more mailing lists