lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Sat, 16 Apr 2016 01:55:26 +0100
From:	Al Viro <viro@...IV.linux.org.uk>
To:	Linus Torvalds <torvalds@...ux-foundation.org>
Cc:	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org
Subject: [PATCH 14/15] parallel lookups machinery, part 4 (and last)

From: Al Viro <viro@...iv.linux.org.uk>

If we *do* run into an in-lookup match, we need to wait for it to
cease being in-lookup.  Fortunately, we do have unused space in
in-lookup dentries - d_lru is never looked at until it stops being
in-lookup.

So we can stash a pointer to wait_queue_head from stack frame of
the caller of ->lookup().  Some precautions are needed while
waiting, but it's not that hard - we do hold a reference to dentry
we are waiting for, so it can't go away.  If it's found to be
in-lookup the wait_queue_head is still alive and will remain so
at least while ->d_lock is held.  Moreover, the condition we
are waiting for becomes true at the same point where everything
on that wq gets woken up, so we can just add ourselves to the
queue once.

d_alloc_parallel() gets a pointer to wait_queue_head_t from its
caller; lookup_slow() adjusted, d_add_ci() taught to use
d_alloc_parallel() if the dentry passed to it happens to be
in-lookup one (i.e. if it's been called from the parallel lookup).

That's pretty much it - all that remains is to switch ->i_mutex
to rwsem and have lookup_slow() take it shared.

Signed-off-by: Al Viro <viro@...iv.linux.org.uk>
---
 fs/dcache.c            | 94 +++++++++++++++++++++++++++++++++++++++-----------
 fs/namei.c             |  3 +-
 include/linux/dcache.h |  8 +++--
 3 files changed, 82 insertions(+), 23 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 0552002..5965588 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1984,28 +1984,36 @@ EXPORT_SYMBOL(d_obtain_root);
 struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 			struct qstr *name)
 {
-	struct dentry *found;
-	struct dentry *new;
+	struct dentry *found, *res;
 
 	/*
 	 * First check if a dentry matching the name already exists,
 	 * if not go ahead and create it now.
 	 */
 	found = d_hash_and_lookup(dentry->d_parent, name);
-	if (!found) {
-		new = d_alloc(dentry->d_parent, name);
-		if (!new) {
-			found = ERR_PTR(-ENOMEM);
-		} else {
-			found = d_splice_alias(inode, new);
-			if (found) {
-				dput(new);
-				return found;
-			}
-			return new;
+	if (found) {
+		iput(inode);
+		return found;
+	}
+	if (dentry->d_flags & DCACHE_PAR_LOOKUP) {
+		found = d_alloc_parallel(dentry->d_parent, name,
+					dentry->d_wait);
+		if (IS_ERR(found) || !(found->d_flags & DCACHE_PAR_LOOKUP)) {
+			iput(inode);
+			return found;
 		}
+	} else {
+		found = d_alloc(dentry->d_parent, name);
+		if (!found) {
+			iput(inode);
+			return ERR_PTR(-ENOMEM);
+		} 
+	}
+	res = d_splice_alias(inode, found);
+	if (res) {
+		dput(found);
+		return res;
 	}
-	iput(inode);
 	return found;
 }
 EXPORT_SYMBOL(d_add_ci);
@@ -2388,8 +2396,23 @@ static inline void end_dir_add(struct inode *dir, unsigned n)
 	smp_store_release(&dir->i_dir_seq, n + 2);
 }
 
+static void d_wait_lookup(struct dentry *dentry)
+{
+	if (dentry->d_flags & DCACHE_PAR_LOOKUP) {
+		DECLARE_WAITQUEUE(wait, current);
+		add_wait_queue(dentry->d_wait, &wait);
+		do {
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			spin_unlock(&dentry->d_lock);
+			schedule();
+			spin_lock(&dentry->d_lock);
+		} while (dentry->d_flags & DCACHE_PAR_LOOKUP);
+	}
+}
+
 struct dentry *d_alloc_parallel(struct dentry *parent,
-				const struct qstr *name)
+				const struct qstr *name,
+				wait_queue_head_t *wq)
 {
 	unsigned int len = name->len;
 	unsigned int hash = name->hash;
@@ -2444,18 +2467,47 @@ retry:
 		}
 		dget(dentry);
 		hlist_bl_unlock(b);
-		/* impossible until we actually enable parallel lookups */
-		BUG();
-		/* and this will be "wait for it to stop being in-lookup" */
-		/* this one will be handled in the next commit */
+		/* somebody is doing lookup for it right now; wait for it */
+		spin_lock(&dentry->d_lock);
+		d_wait_lookup(dentry);
+		/*
+		 * it's not in-lookup anymore; in principle we should repeat
+		 * everything from dcache lookup, but it's likely to be what
+		 * d_lookup() would've found anyway.  If it is, just return it;
+		 * otherwise we really have to repeat the whole thing.
+		 */
+		if (unlikely(dentry->d_name.hash != hash))
+			goto mismatch;
+		if (unlikely(dentry->d_parent != parent))
+			goto mismatch;
+		if (unlikely(d_unhashed(dentry)))
+			goto mismatch;
+		if (parent->d_flags & DCACHE_OP_COMPARE) {
+			int tlen = dentry->d_name.len;
+			const char *tname = dentry->d_name.name;
+			if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
+				goto mismatch;
+		} else {
+			if (unlikely(dentry->d_name.len != len))
+				goto mismatch;
+			if (unlikely(dentry_cmp(dentry, str, len)))
+				goto mismatch;
+		}
+		/* OK, it *is* a hashed match; return it */
+		spin_unlock(&dentry->d_lock);
 		dput(new);
 		return dentry;
 	}
 	/* we can't take ->d_lock here; it's OK, though. */
 	new->d_flags |= DCACHE_PAR_LOOKUP;
+	new->d_wait = wq;
 	hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
 	hlist_bl_unlock(b);
 	return new;
+mismatch:
+	spin_unlock(&dentry->d_lock);
+	dput(dentry);
+	goto retry;
 }
 
 void __d_not_in_lookup(struct dentry *dentry)
@@ -2465,9 +2517,11 @@ void __d_not_in_lookup(struct dentry *dentry)
 	hlist_bl_lock(b);
 	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
 	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
+	wake_up_all(dentry->d_wait);
+	dentry->d_wait = NULL;
 	hlist_bl_unlock(b);
 	INIT_HLIST_NODE(&dentry->d_u.d_alias);
-	/* more stuff will land here */
+	INIT_LIST_HEAD(&dentry->d_lru);
 }
 
 /* inode->i_lock held if inode is non-NULL */
diff --git a/fs/namei.c b/fs/namei.c
index fbce016..eb879d6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1605,13 +1605,14 @@ static struct dentry *lookup_slow(const struct qstr *name,
 {
 	struct dentry *dentry = ERR_PTR(-ENOENT), *old;
 	struct inode *inode = dir->d_inode;
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 
 	inode_lock(inode);
 	/* Don't go there if it's already dead */
 	if (unlikely(IS_DEADDIR(inode)))
 		goto out;
 again:
-	dentry = d_alloc_parallel(dir, name);
+	dentry = d_alloc_parallel(dir, name, &wq);
 	if (IS_ERR(dentry))
 		goto out;
 	if (unlikely(!(dentry->d_flags & DCACHE_PAR_LOOKUP))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 3ab5ce4..bb4b44f 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -123,7 +123,10 @@ struct dentry {
 	unsigned long d_time;		/* used by d_revalidate */
 	void *d_fsdata;			/* fs-specific data */
 
-	struct list_head d_lru;		/* LRU list */
+	union {
+		struct list_head d_lru;		/* LRU list */
+		wait_queue_head_t *d_wait;	/* in-lookup ones only */
+	};
 	struct list_head d_child;	/* child of parent list */
 	struct list_head d_subdirs;	/* our children */
 	/*
@@ -249,7 +252,8 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
 /* allocate/de-allocate */
 extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
 extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
-extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *);
+extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *,
+					wait_queue_head_t *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
 extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
-- 
2.8.0.rc3

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ