lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20160418204506.GD3193@fieldses.org>
Date:	Mon, 18 Apr 2016 16:45:06 -0400
From:	bfields@...ldses.org (J. Bruce Fields)
To:	Al Viro <viro@...IV.linux.org.uk>
Cc:	Linus Torvalds <torvalds@...ux-foundation.org>,
	linux-kernel@...r.kernel.org, linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH 13/15] parallel lookups machinery, part 3

On Sat, Apr 16, 2016 at 01:55:25AM +0100, Al Viro wrote:
> From: Al Viro <viro@...iv.linux.org.uk>
> 
> We will need to be able to check if there is an in-lookup
> dentry with matching parent/name.  Right now it's impossible,
> but as soon as start locking directories shared such beasts
> will appear.
> 
> Add a secondary hash for locating those.  Hash chains go through
> the same space where d_alias will be once it's not in-lookup anymore.
> Search is done under the same bitlock we use for modifications -
> with the primary hash we can rely on d_rehash() into the wrong
> chain being the worst that could happen, but here the pointers are
> buggered once it's removed from the chain.  On the other hand,
> the chains are not going to be long and normally we'll end up
> adding to the chain anyway.  That allows us to avoid bothering with
> ->d_lock when doing the comparisons - everything is stable until
> removed from chain.
> 
> New helper: d_alloc_parallel().  Right now it allocates, verifies
> that no hashed and in-lookup matches exist and adds to in-lookup
> hash.
> 
> Returns ERR_PTR() for error, hashed match (in the unlikely case it's
> been found) or new dentry.  In-lookup matches trigger BUG() for
> now; that will change in the next commit when we introduce waiting
> for ongoing lookup to finish.  Note that in-lookup matches won't be
> possible until we actually go for shared locking.
> 
> lookup_slow() switched to use of d_alloc_parallel().
> 
> Again, these commits are separated only for making it easier to
> review.  All this machinery will start doing something useful only
> when we go for shared locking; it's just that the combination is
> too large for my taste.

Thanks for doing that!  I'm not sure I get this yet, but I feel like I
have at least a fighting chance....

--b.

> 
> Signed-off-by: Al Viro <viro@...iv.linux.org.uk>
> ---
>  fs/dcache.c            | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  fs/namei.c             | 44 +++++++++++--------------
>  include/linux/dcache.h |  2 ++
>  3 files changed, 108 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 3959f18..0552002 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -111,6 +111,17 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
>  	return dentry_hashtable + hash_32(hash, d_hash_shift);
>  }
>  
> +#define IN_LOOKUP_SHIFT 10
> +static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT];
> +
> +static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent,
> +					unsigned int hash)
> +{
> +	hash += (unsigned long) parent / L1_CACHE_BYTES;
> +	return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT);
> +}
> +
> +
>  /* Statistics gathering. */
>  struct dentry_stat_t dentry_stat = {
>  	.age_limit = 45,
> @@ -2377,9 +2388,85 @@ static inline void end_dir_add(struct inode *dir, unsigned n)
>  	smp_store_release(&dir->i_dir_seq, n + 2);
>  }
>  
> +struct dentry *d_alloc_parallel(struct dentry *parent,
> +				const struct qstr *name)
> +{
> +	unsigned int len = name->len;
> +	unsigned int hash = name->hash;
> +	const unsigned char *str = name->name;
> +	struct hlist_bl_head *b = in_lookup_hash(parent, hash);
> +	struct hlist_bl_node *node;
> +	struct dentry *new = d_alloc(parent, name);
> +	struct dentry *dentry;
> +	unsigned seq;
> +
> +	if (unlikely(!new))
> +		return ERR_PTR(-ENOMEM);
> +
> +retry:
> +	seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1;
> +	dentry = d_lookup(parent, name);
> +	if (unlikely(dentry)) {
> +		dput(new);
> +		return dentry;
> +	}
> +
> +	hlist_bl_lock(b);
> +	smp_rmb();
> +	if (unlikely(parent->d_inode->i_dir_seq != seq)) {
> +		hlist_bl_unlock(b);
> +		goto retry;
> +	}
> +	/*
> +	 * No changes for the parent since the beginning of d_lookup().
> +	 * Since all removals from the chain happen with hlist_bl_lock(),
> +	 * any potential in-lookup matches are going to stay here until
> +	 * we unlock the chain.  All fields are stable in everything
> +	 * we encounter.
> +	 */
> +	hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) {
> +		if (dentry->d_name.hash != hash)
> +			continue;
> +		if (dentry->d_parent != parent)
> +			continue;
> +		if (d_unhashed(dentry))
> +			continue;
> +		if (parent->d_flags & DCACHE_OP_COMPARE) {
> +			int tlen = dentry->d_name.len;
> +			const char *tname = dentry->d_name.name;
> +			if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
> +				continue;
> +		} else {
> +			if (dentry->d_name.len != len)
> +				continue;
> +			if (dentry_cmp(dentry, str, len))
> +				continue;
> +		}
> +		dget(dentry);
> +		hlist_bl_unlock(b);
> +		/* impossible until we actually enable parallel lookups */
> +		BUG();
> +		/* and this will be "wait for it to stop being in-lookup" */
> +		/* this one will be handled in the next commit */
> +		dput(new);
> +		return dentry;
> +	}
> +	/* we can't take ->d_lock here; it's OK, though. */
> +	new->d_flags |= DCACHE_PAR_LOOKUP;
> +	hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b);
> +	hlist_bl_unlock(b);
> +	return new;
> +}
> +
>  void __d_not_in_lookup(struct dentry *dentry)
>  {
> +	struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent,
> +						 dentry->d_name.hash);
> +	hlist_bl_lock(b);
>  	dentry->d_flags &= ~DCACHE_PAR_LOOKUP;
> +	__hlist_bl_del(&dentry->d_u.d_in_lookup_hash);
> +	hlist_bl_unlock(b);
> +	INIT_HLIST_NODE(&dentry->d_u.d_alias);
>  	/* more stuff will land here */
>  }
>  
> diff --git a/fs/namei.c b/fs/namei.c
> index 0ee8b9d..fbce016 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -1603,46 +1603,40 @@ static struct dentry *lookup_slow(const struct qstr *name,
>  				  struct dentry *dir,
>  				  unsigned int flags)
>  {
> -	struct dentry *dentry, *old;
> +	struct dentry *dentry = ERR_PTR(-ENOENT), *old;
>  	struct inode *inode = dir->d_inode;
>  
>  	inode_lock(inode);
>  	/* Don't go there if it's already dead */
> -	if (unlikely(IS_DEADDIR(inode))) {
> -		inode_unlock(inode);
> -		return ERR_PTR(-ENOENT);
> -	}
> -	dentry = d_lookup(dir, name);
> -	if (unlikely(dentry)) {
> +	if (unlikely(IS_DEADDIR(inode)))
> +		goto out;
> +again:
> +	dentry = d_alloc_parallel(dir, name);
> +	if (IS_ERR(dentry))
> +		goto out;
> +	if (unlikely(!(dentry->d_flags & DCACHE_PAR_LOOKUP))) {
>  		if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
>  		    !(flags & LOOKUP_NO_REVAL)) {
>  			int error = d_revalidate(dentry, flags);
>  			if (unlikely(error <= 0)) {
> -				if (!error)
> +				if (!error) {
>  					d_invalidate(dentry);
> +					dput(dentry);
> +					goto again;
> +				}
>  				dput(dentry);
>  				dentry = ERR_PTR(error);
>  			}
>  		}
> -		if (dentry) {
> -			inode_unlock(inode);
> -			return dentry;
> +	} else {
> +		old = inode->i_op->lookup(inode, dentry, flags);
> +		d_not_in_lookup(dentry);
> +		if (unlikely(old)) {
> +			dput(dentry);
> +			dentry = old;
>  		}
>  	}
> -	dentry = d_alloc(dir, name);
> -	if (unlikely(!dentry)) {
> -		inode_unlock(inode);
> -		return ERR_PTR(-ENOMEM);
> -	}
> -	spin_lock(&dentry->d_lock);
> -	dentry->d_flags |= DCACHE_PAR_LOOKUP;
> -	spin_unlock(&dentry->d_lock);
> -	old = inode->i_op->lookup(inode, dentry, flags);
> -	d_not_in_lookup(dentry);
> -	if (unlikely(old)) {
> -		dput(dentry);
> -		dentry = old;
> -	}
> +out:
>  	inode_unlock(inode);
>  	return dentry;
>  }
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index cfc1240..3ab5ce4 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -131,6 +131,7 @@ struct dentry {
>  	 */
>  	union {
>  		struct hlist_node d_alias;	/* inode alias list */
> +		struct hlist_bl_node d_in_lookup_hash;	/* only for in-lookup ones */
>  	 	struct rcu_head d_rcu;
>  	} d_u;
>  };
> @@ -248,6 +249,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op
>  /* allocate/de-allocate */
>  extern struct dentry * d_alloc(struct dentry *, const struct qstr *);
>  extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *);
> +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *);
>  extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
>  extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *);
>  extern struct dentry * d_exact_alias(struct dentry *, struct inode *);
> -- 
> 2.8.0.rc3
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ