lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YGzdqjmDEN788UV1@moria.home.lan>
Date:   Tue, 6 Apr 2021 18:16:10 -0400
From:   Kent Overstreet <kent.overstreet@...il.com>
To:     Dave Chinner <david@...morbit.com>
Cc:     linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH 3/3] vfs: inode cache conversion to hash-bl

On Tue, Apr 06, 2021 at 10:33:43PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@...hat.com>
> 
> Because scalability of the global inode_hash_lock really, really
> sucks and prevents me from doing scalability characterisation and
> analysis of bcachefs algorithms.
> 
> Profiles of a 32-way concurrent create of 51.2m inodes with fsmark
> on a couple of different filesystems on a 5.10 kernel:
> 
> -   52.13%     0.04%  [kernel]            [k] ext4_create
>    - 52.09% ext4_create
>       - 41.03% __ext4_new_inode
>          - 29.92% insert_inode_locked
>             - 25.35% _raw_spin_lock
>                - do_raw_spin_lock
>                   - 24.97% __pv_queued_spin_lock_slowpath
> 
> 
> -   72.33%     0.02%  [kernel]            [k] do_filp_open
>    - 72.31% do_filp_open
>       - 72.28% path_openat
>          - 57.03% bch2_create
>             - 56.46% __bch2_create
>                - 40.43% inode_insert5
>                   - 36.07% _raw_spin_lock
>                      - do_raw_spin_lock
>                           35.86% __pv_queued_spin_lock_slowpath
>                     4.02% find_inode
> 
> btrfs was tested but it is limited by internal lock contention at
> >=2 threads on this workload, so never hammers the inode cache lock
> hard enough for this change to matter to it's performance.
> 
> However, both bcachefs and ext4 demonstrate poor scaling at >=8
> threads on concurrent lookup or create workloads.
> 
> Hence convert the inode hash table to a RCU-aware hash-bl table just
> like the dentry cache. Note that we need to store a pointer to the
> hlist_bl_head the inode has been added to in the inode so that when
> it comes to unhash the inode we know what list to lock. We need to
> do this because, unlike the dentry cache, the hash value that is
> used to hash the inode is not generated from the inode itself. i.e.
> filesystems can provide this themselves so we have to either store
> the hashval or the hlist head pointer in the inode to be able to
> find the right list head for removal...
> 
> Concurrent create with variying thread count (files/s):
> 
> 		vanilla			Patched
> threads		ext4	  bcachefs	ext4	  bcachefs
> 2		117k			112k	   85k
> 4		185k			190k	  145k
> 8		303k	  185k		346k	  255k
> 16		389k	  190k		465k	  420k
> 32		360k	  142k		437k	  481k
> 
> 		ext4			bcachefs
> threads		vanilla	 patched	vanilla	patched
> 2		117k	 112k		 80k	 85k
> 4		185k	 190k		133k	145k
> 8		303k	 346k		185k	255k
> 16		389k	 465k		190k	420k
> 32		360k	 437k		142k	481k
> 
> CPU usage for both bcachefs and ext4 at 16 and 32 threads has been
> halved on the patched kernel, while performance has increased
> marginally on ext4 and massively on bcachefs. Internal filesystem
> algorithms now limit performance on these workloads, not the global
> inode_hash_lock.
> 
> Profile of the workloads on the patched kernels:
> 
> -   35.94%     0.07%  [kernel]                  [k] ext4_create
>    - 35.87% ext4_create
>       - 20.45% __ext4_new_inode
> ...
>            3.36% insert_inode_locked
> 
>    - 78.43% do_filp_open
>       - 78.36% path_openat
>          - 53.95% bch2_create
>             - 47.99% __bch2_create
> ....
>               - 7.57% inode_insert5
>                     6.94% find_inode
> 
> Spinlock contention is largely gone from the inode hash operations
> and the filesystems are limited by contention in their internal
> algorithms.
> 
> Signed-off-by: Dave Chinner <dchinner@...hat.com>

Reviewed-and-tested-by: Kent Overstreet <kent.overstreet@...il.com>

> ---
>  fs/inode.c         | 200 ++++++++++++++++++++++++++++-----------------
>  include/linux/fs.h |   9 +-
>  2 files changed, 132 insertions(+), 77 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index b8d9eb3454dc..867af386177b 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -57,8 +57,7 @@
>  
>  static unsigned int i_hash_mask __read_mostly;
>  static unsigned int i_hash_shift __read_mostly;
> -static struct hlist_head *inode_hashtable __read_mostly;
> -static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
> +static struct hlist_bl_head *inode_hashtable __read_mostly;
>  
>  static unsigned long hash(struct super_block *sb, unsigned long hashval)
>  {
> @@ -70,7 +69,7 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval)
>  	return tmp & i_hash_mask;
>  }
>  
> -static inline struct hlist_head *i_hash_head(struct super_block *sb,
> +static inline struct hlist_bl_head *i_hash_head(struct super_block *sb,
>  		unsigned int hashval)
>  {
>  	return inode_hashtable + hash(sb, hashval);
> @@ -407,7 +406,7 @@ EXPORT_SYMBOL(address_space_init_once);
>  void inode_init_once(struct inode *inode)
>  {
>  	memset(inode, 0, sizeof(*inode));
> -	INIT_HLIST_NODE(&inode->i_hash);
> +	INIT_HLIST_BL_NODE(&inode->i_hash);
>  	INIT_LIST_HEAD(&inode->i_devices);
>  	INIT_LIST_HEAD(&inode->i_io_list);
>  	INIT_LIST_HEAD(&inode->i_wb_list);
> @@ -491,6 +490,17 @@ static inline void inode_sb_list_del(struct inode *inode)
>  	}
>  }
>  
> +/*
> + * Ensure that we store the hash head in the inode when we insert the inode into
> + * the hlist_bl_head...
> + */
> +static inline void
> +__insert_inode_hash_head(struct inode *inode, struct hlist_bl_head *b)
> +{
> +	hlist_bl_add_head_rcu(&inode->i_hash, b);
> +	inode->i_hash_head = b;
> +}
> +
>  /**
>   *	__insert_inode_hash - hash an inode
>   *	@inode: unhashed inode
> @@ -501,13 +511,13 @@ static inline void inode_sb_list_del(struct inode *inode)
>   */
>  void __insert_inode_hash(struct inode *inode, unsigned long hashval)
>  {
> -	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
> +	struct hlist_bl_head *b = i_hash_head(inode->i_sb, hashval);
>  
> -	spin_lock(&inode_hash_lock);
> +	hlist_bl_lock(b);
>  	spin_lock(&inode->i_lock);
> -	hlist_add_head_rcu(&inode->i_hash, b);
> +	__insert_inode_hash_head(inode, b);
>  	spin_unlock(&inode->i_lock);
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_unlock(b);
>  }
>  EXPORT_SYMBOL(__insert_inode_hash);
>  
> @@ -519,11 +529,44 @@ EXPORT_SYMBOL(__insert_inode_hash);
>   */
>  void __remove_inode_hash(struct inode *inode)
>  {
> -	spin_lock(&inode_hash_lock);
> -	spin_lock(&inode->i_lock);
> -	hlist_del_init_rcu(&inode->i_hash);
> -	spin_unlock(&inode->i_lock);
> -	spin_unlock(&inode_hash_lock);
> +	struct hlist_bl_head *b = inode->i_hash_head;
> +
> +	/*
> +	 * There are some callers that come through here without synchronisation
> +	 * and potentially with multiple references to the inode. Hence we have
> +	 * to handle the case that we might race with a remove and insert to a
> +	 * different list. Coda, in particular, seems to have a userspace API
> +	 * that can directly trigger "unhash/rehash to different list" behaviour
> +	 * without any serialisation at all.
> +	 *
> +	 * Hence we have to handle the situation where the inode->i_hash_head
> +	 * might point to a different list than what we expect, indicating that
> +	 * we raced with another unhash and potentially a new insertion. This
> +	 * means we have to retest the head once we have everything locked up
> +	 * and loop again if it doesn't match.
> +	 */
> +	while (b) {
> +		hlist_bl_lock(b);
> +		spin_lock(&inode->i_lock);
> +		if (b != inode->i_hash_head) {
> +			hlist_bl_unlock(b);
> +			b = inode->i_hash_head;
> +			spin_unlock(&inode->i_lock);
> +			continue;
> +		}
> +		/*
> +		 * Need to set the pprev pointer to NULL after list removal so
> +		 * that both RCU traversals and hlist_bl_unhashed() work
> +		 * correctly at this point.
> +		 */
> +		hlist_bl_del_rcu(&inode->i_hash);
> +		inode->i_hash.pprev = NULL;
> +		inode->i_hash_head = NULL;
> +		spin_unlock(&inode->i_lock);
> +		hlist_bl_unlock(b);
> +		break;
> +	}
> +
>  }
>  EXPORT_SYMBOL(__remove_inode_hash);
>  
> @@ -813,26 +856,28 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
>  	return freed;
>  }
>  
> -static void __wait_on_freeing_inode(struct inode *inode);
> +static void __wait_on_freeing_inode(struct hlist_bl_head *b,
> +				struct inode *inode);
>  /*
>   * Called with the inode lock held.
>   */
>  static struct inode *find_inode(struct super_block *sb,
> -				struct hlist_head *head,
> +				struct hlist_bl_head *b,
>  				int (*test)(struct inode *, void *),
>  				void *data)
>  {
> +	struct hlist_bl_node *node;
>  	struct inode *inode = NULL;
>  
>  repeat:
> -	hlist_for_each_entry(inode, head, i_hash) {
> +	hlist_bl_for_each_entry(inode, node, b, i_hash) {
>  		if (inode->i_sb != sb)
>  			continue;
>  		if (!test(inode, data))
>  			continue;
>  		spin_lock(&inode->i_lock);
>  		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
> -			__wait_on_freeing_inode(inode);
> +			__wait_on_freeing_inode(b, inode);
>  			goto repeat;
>  		}
>  		if (unlikely(inode->i_state & I_CREATING)) {
> @@ -851,19 +896,20 @@ static struct inode *find_inode(struct super_block *sb,
>   * iget_locked for details.
>   */
>  static struct inode *find_inode_fast(struct super_block *sb,
> -				struct hlist_head *head, unsigned long ino)
> +				struct hlist_bl_head *b, unsigned long ino)
>  {
> +	struct hlist_bl_node *node;
>  	struct inode *inode = NULL;
>  
>  repeat:
> -	hlist_for_each_entry(inode, head, i_hash) {
> +	hlist_bl_for_each_entry(inode, node, b, i_hash) {
>  		if (inode->i_ino != ino)
>  			continue;
>  		if (inode->i_sb != sb)
>  			continue;
>  		spin_lock(&inode->i_lock);
>  		if (inode->i_state & (I_FREEING|I_WILL_FREE)) {
> -			__wait_on_freeing_inode(inode);
> +			__wait_on_freeing_inode(b, inode);
>  			goto repeat;
>  		}
>  		if (unlikely(inode->i_state & I_CREATING)) {
> @@ -1072,26 +1118,26 @@ EXPORT_SYMBOL(unlock_two_nondirectories);
>   * return it locked, hashed, and with the I_NEW flag set. The file system gets
>   * to fill it in before unlocking it via unlock_new_inode().
>   *
> - * Note both @test and @set are called with the inode_hash_lock held, so can't
> - * sleep.
> + * Note both @test and @set are called with the inode hash chain lock held,
> + * so can't sleep.
>   */
>  struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
>  			    int (*test)(struct inode *, void *),
>  			    int (*set)(struct inode *, void *), void *data)
>  {
> -	struct hlist_head *head = i_hash_head(inode->i_sb, hashval);
> +	struct hlist_bl_head *b = i_hash_head(inode->i_sb, hashval);
>  	struct inode *old;
>  	bool creating = inode->i_state & I_CREATING;
>  
>  again:
> -	spin_lock(&inode_hash_lock);
> -	old = find_inode(inode->i_sb, head, test, data);
> +	hlist_bl_lock(b);
> +	old = find_inode(inode->i_sb, b, test, data);
>  	if (unlikely(old)) {
>  		/*
>  		 * Uhhuh, somebody else created the same inode under us.
>  		 * Use the old inode instead of the preallocated one.
>  		 */
> -		spin_unlock(&inode_hash_lock);
> +		hlist_bl_unlock(b);
>  		if (IS_ERR(old))
>  			return NULL;
>  		wait_on_inode(old);
> @@ -1113,12 +1159,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
>  	 */
>  	spin_lock(&inode->i_lock);
>  	inode->i_state |= I_NEW;
> -	hlist_add_head_rcu(&inode->i_hash, head);
> +	__insert_inode_hash_head(inode, b);
>  	spin_unlock(&inode->i_lock);
>  	if (!creating)
>  		inode_sb_list_add(inode);
>  unlock:
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_unlock(b);
>  
>  	return inode;
>  }
> @@ -1179,12 +1225,12 @@ EXPORT_SYMBOL(iget5_locked);
>   */
>  struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>  {
> -	struct hlist_head *head = i_hash_head(sb, ino);
> +	struct hlist_bl_head *b = i_hash_head(sb, ino);
>  	struct inode *inode;
>  again:
> -	spin_lock(&inode_hash_lock);
> -	inode = find_inode_fast(sb, head, ino);
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_lock(b);
> +	inode = find_inode_fast(sb, b, ino);
> +	hlist_bl_unlock(b);
>  	if (inode) {
>  		if (IS_ERR(inode))
>  			return NULL;
> @@ -1200,17 +1246,17 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>  	if (inode) {
>  		struct inode *old;
>  
> -		spin_lock(&inode_hash_lock);
> +		hlist_bl_lock(b);
>  		/* We released the lock, so.. */
> -		old = find_inode_fast(sb, head, ino);
> +		old = find_inode_fast(sb, b, ino);
>  		if (!old) {
>  			inode->i_ino = ino;
>  			spin_lock(&inode->i_lock);
>  			inode->i_state = I_NEW;
> -			hlist_add_head_rcu(&inode->i_hash, head);
> +			__insert_inode_hash_head(inode, b);
>  			spin_unlock(&inode->i_lock);
>  			inode_sb_list_add(inode);
> -			spin_unlock(&inode_hash_lock);
> +			hlist_bl_unlock(b);
>  
>  			/* Return the locked inode with I_NEW set, the
>  			 * caller is responsible for filling in the contents
> @@ -1223,7 +1269,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>  		 * us. Use the old inode instead of the one we just
>  		 * allocated.
>  		 */
> -		spin_unlock(&inode_hash_lock);
> +		hlist_bl_unlock(b);
>  		destroy_inode(inode);
>  		if (IS_ERR(old))
>  			return NULL;
> @@ -1247,10 +1293,11 @@ EXPORT_SYMBOL(iget_locked);
>   */
>  static int test_inode_iunique(struct super_block *sb, unsigned long ino)
>  {
> -	struct hlist_head *b = i_hash_head(sb, ino);
> +	struct hlist_bl_head *b = i_hash_head(sb, ino);
> +	struct hlist_bl_node *node;
>  	struct inode *inode;
>  
> -	hlist_for_each_entry_rcu(inode, b, i_hash) {
> +	hlist_bl_for_each_entry_rcu(inode, node, b, i_hash) {
>  		if (inode->i_ino == ino && inode->i_sb == sb)
>  			return 0;
>  	}
> @@ -1334,12 +1381,12 @@ EXPORT_SYMBOL(igrab);
>  struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
>  		int (*test)(struct inode *, void *), void *data)
>  {
> -	struct hlist_head *head = i_hash_head(sb, hashval);
> +	struct hlist_bl_head *b = i_hash_head(sb, hashval);
>  	struct inode *inode;
>  
> -	spin_lock(&inode_hash_lock);
> -	inode = find_inode(sb, head, test, data);
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_lock(b);
> +	inode = find_inode(sb, b, test, data);
> +	hlist_bl_unlock(b);
>  
>  	return IS_ERR(inode) ? NULL : inode;
>  }
> @@ -1389,12 +1436,12 @@ EXPORT_SYMBOL(ilookup5);
>   */
>  struct inode *ilookup(struct super_block *sb, unsigned long ino)
>  {
> -	struct hlist_head *head = i_hash_head(sb, ino);
> +	struct hlist_bl_head *b = i_hash_head(sb, ino);
>  	struct inode *inode;
>  again:
> -	spin_lock(&inode_hash_lock);
> -	inode = find_inode_fast(sb, head, ino);
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_lock(b);
> +	inode = find_inode_fast(sb, b, ino);
> +	hlist_bl_unlock(b);
>  
>  	if (inode) {
>  		if (IS_ERR(inode))
> @@ -1438,12 +1485,13 @@ struct inode *find_inode_nowait(struct super_block *sb,
>  					     void *),
>  				void *data)
>  {
> -	struct hlist_head *head = i_hash_head(sb, hashval);
> +	struct hlist_bl_head *b = i_hash_head(sb, hashval);
> +	struct hlist_bl_node *node;
>  	struct inode *inode, *ret_inode = NULL;
>  	int mval;
>  
> -	spin_lock(&inode_hash_lock);
> -	hlist_for_each_entry(inode, head, i_hash) {
> +	hlist_bl_lock(b);
> +	hlist_bl_for_each_entry(inode, node, b, i_hash) {
>  		if (inode->i_sb != sb)
>  			continue;
>  		mval = match(inode, hashval, data);
> @@ -1454,7 +1502,7 @@ struct inode *find_inode_nowait(struct super_block *sb,
>  		goto out;
>  	}
>  out:
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_unlock(b);
>  	return ret_inode;
>  }
>  EXPORT_SYMBOL(find_inode_nowait);
> @@ -1483,13 +1531,14 @@ EXPORT_SYMBOL(find_inode_nowait);
>  struct inode *find_inode_rcu(struct super_block *sb, unsigned long hashval,
>  			     int (*test)(struct inode *, void *), void *data)
>  {
> -	struct hlist_head *head = i_hash_head(sb, hashval);
> +	struct hlist_bl_head *b = i_hash_head(sb, hashval);
> +	struct hlist_bl_node *node;
>  	struct inode *inode;
>  
>  	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
>  			 "suspicious find_inode_rcu() usage");
>  
> -	hlist_for_each_entry_rcu(inode, head, i_hash) {
> +	hlist_bl_for_each_entry_rcu(inode, node, b, i_hash) {
>  		if (inode->i_sb == sb &&
>  		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)) &&
>  		    test(inode, data))
> @@ -1521,13 +1570,14 @@ EXPORT_SYMBOL(find_inode_rcu);
>  struct inode *find_inode_by_ino_rcu(struct super_block *sb,
>  				    unsigned long ino)
>  {
> -	struct hlist_head *head = i_hash_head(sb, ino);
> +	struct hlist_bl_head *b = i_hash_head(sb, ino);
> +	struct hlist_bl_node *node;
>  	struct inode *inode;
>  
>  	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
>  			 "suspicious find_inode_by_ino_rcu() usage");
>  
> -	hlist_for_each_entry_rcu(inode, head, i_hash) {
> +	hlist_bl_for_each_entry_rcu(inode, node, b, i_hash) {
>  		if (inode->i_ino == ino &&
>  		    inode->i_sb == sb &&
>  		    !(READ_ONCE(inode->i_state) & (I_FREEING | I_WILL_FREE)))
> @@ -1541,39 +1591,42 @@ int insert_inode_locked(struct inode *inode)
>  {
>  	struct super_block *sb = inode->i_sb;
>  	ino_t ino = inode->i_ino;
> -	struct hlist_head *head = i_hash_head(sb, ino);
> +	struct hlist_bl_head *b = i_hash_head(sb, ino);
>  
>  	while (1) {
> -		struct inode *old = NULL;
> -		spin_lock(&inode_hash_lock);
> -		hlist_for_each_entry(old, head, i_hash) {
> -			if (old->i_ino != ino)
> +		struct hlist_bl_node *node;
> +		struct inode *old = NULL, *t;
> +
> +		hlist_bl_lock(b);
> +		hlist_bl_for_each_entry(t, node, b, i_hash) {
> +			if (t->i_ino != ino)
>  				continue;
> -			if (old->i_sb != sb)
> +			if (t->i_sb != sb)
>  				continue;
> -			spin_lock(&old->i_lock);
> -			if (old->i_state & (I_FREEING|I_WILL_FREE)) {
> -				spin_unlock(&old->i_lock);
> +			spin_lock(&t->i_lock);
> +			if (t->i_state & (I_FREEING|I_WILL_FREE)) {
> +				spin_unlock(&t->i_lock);
>  				continue;
>  			}
> +			old = t;
>  			break;
>  		}
>  		if (likely(!old)) {
>  			spin_lock(&inode->i_lock);
>  			inode->i_state |= I_NEW | I_CREATING;
> -			hlist_add_head_rcu(&inode->i_hash, head);
> +			__insert_inode_hash_head(inode, b);
>  			spin_unlock(&inode->i_lock);
> -			spin_unlock(&inode_hash_lock);
> +			hlist_bl_unlock(b);
>  			return 0;
>  		}
>  		if (unlikely(old->i_state & I_CREATING)) {
>  			spin_unlock(&old->i_lock);
> -			spin_unlock(&inode_hash_lock);
> +			hlist_bl_unlock(b);
>  			return -EBUSY;
>  		}
>  		__iget(old);
>  		spin_unlock(&old->i_lock);
> -		spin_unlock(&inode_hash_lock);
> +		hlist_bl_unlock(b);
>  		wait_on_inode(old);
>  		if (unlikely(!inode_unhashed(old))) {
>  			iput(old);
> @@ -2046,17 +2099,18 @@ EXPORT_SYMBOL(inode_needs_sync);
>   * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list
>   * will DTRT.
>   */
> -static void __wait_on_freeing_inode(struct inode *inode)
> +static void __wait_on_freeing_inode(struct hlist_bl_head *b,
> +				struct inode *inode)
>  {
>  	wait_queue_head_t *wq;
>  	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
>  	wq = bit_waitqueue(&inode->i_state, __I_NEW);
>  	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
>  	spin_unlock(&inode->i_lock);
> -	spin_unlock(&inode_hash_lock);
> +	hlist_bl_unlock(b);
>  	schedule();
>  	finish_wait(wq, &wait.wq_entry);
> -	spin_lock(&inode_hash_lock);
> +	hlist_bl_lock(b);
>  }
>  
>  static __initdata unsigned long ihash_entries;
> @@ -2082,7 +2136,7 @@ void __init inode_init_early(void)
>  
>  	inode_hashtable =
>  		alloc_large_system_hash("Inode-cache",
> -					sizeof(struct hlist_head),
> +					sizeof(struct hlist_bl_head),
>  					ihash_entries,
>  					14,
>  					HASH_EARLY | HASH_ZERO,
> @@ -2108,7 +2162,7 @@ void __init inode_init(void)
>  
>  	inode_hashtable =
>  		alloc_large_system_hash("Inode-cache",
> -					sizeof(struct hlist_head),
> +					sizeof(struct hlist_bl_head),
>  					ihash_entries,
>  					14,
>  					HASH_ZERO,
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index ec8f3ddf4a6a..89c9e49a71a6 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -664,7 +664,8 @@ struct inode {
>  	unsigned long		dirtied_when;	/* jiffies of first dirtying */
>  	unsigned long		dirtied_time_when;
>  
> -	struct hlist_node	i_hash;
> +	struct hlist_bl_node	i_hash;
> +	struct hlist_bl_head	*i_hash_head;
>  	struct list_head	i_io_list;	/* backing dev IO list */
>  #ifdef CONFIG_CGROUP_WRITEBACK
>  	struct bdi_writeback	*i_wb;		/* the associated cgroup wb */
> @@ -730,7 +731,7 @@ static inline unsigned int i_blocksize(const struct inode *node)
>  
>  static inline int inode_unhashed(struct inode *inode)
>  {
> -	return hlist_unhashed(&inode->i_hash);
> +	return hlist_bl_unhashed(&inode->i_hash);
>  }
>  
>  /*
> @@ -741,7 +742,7 @@ static inline int inode_unhashed(struct inode *inode)
>   */
>  static inline void inode_fake_hash(struct inode *inode)
>  {
> -	hlist_add_fake(&inode->i_hash);
> +	hlist_bl_add_fake(&inode->i_hash);
>  }
>  
>  /*
> @@ -3065,7 +3066,7 @@ static inline void insert_inode_hash(struct inode *inode)
>  extern void __remove_inode_hash(struct inode *);
>  static inline void remove_inode_hash(struct inode *inode)
>  {
> -	if (!inode_unhashed(inode) && !hlist_fake(&inode->i_hash))
> +	if (!inode_unhashed(inode) && !hlist_bl_fake(&inode->i_hash))
>  		__remove_inode_hash(inode);
>  }
>  
> -- 
> 2.31.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ