Now that i_count is a proper reference count, such that 0 means free or freeing, and all .destroy_inode methods use RCU to free inodes, we can trivially convert the inode hash to RCU and do RCU lookups. So provide RCU variants of find_inode() and find_inode_fast(), in case we do hit an inode with i_count==0, we fall back to the old code that does a __wait_for_freeing_inode(). _However_ this makes the situation with using i_count() for decisions far worse; those few that are in the evict/free path seem safe, the rest is up for grabs. If the rest of the sites were OK, they probably are no longer and need help. Signed-off-by: Peter Zijlstra (Intel) --- fs/inode.c | 104 ++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 28 deletions(-) --- a/fs/inode.c +++ b/fs/inode.c @@ -471,7 +471,7 @@ void __insert_inode_hash(struct inode *i spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); - hlist_add_head(&inode->i_hash, b); + hlist_add_head_rcu(&inode->i_hash, b); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } @@ -487,7 +487,7 @@ void __remove_inode_hash(struct inode *i { spin_lock(&inode_hash_lock); spin_lock(&inode->i_lock); - hlist_del_init(&inode->i_hash); + hlist_del_init_rcu(&inode->i_hash); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } @@ -777,15 +777,15 @@ long prune_icache_sb(struct super_block } static void __wait_on_freeing_inode(struct inode *inode); -/* - * Called with the inode lock held. - */ -static struct inode *find_inode(struct super_block *sb, + +static struct inode *__find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), void *data) { - struct inode *inode = NULL; + struct inode *inode; + + lockdep_assert_held(&inode_hash_lock); repeat: hlist_for_each_entry(inode, head, i_hash) { @@ -805,14 +805,44 @@ static struct inode *find_inode(struct s return NULL; } +static struct inode *find_inode(struct super_block *sb, + struct hlist_head *head, + int (*test)(struct inode *, void *), + void *data) +{ + struct inode *inode; + + rcu_read_lock(); + hlist_for_each_entry_rcu(inode, head, i_hash) { + if (inode->i_sb != sb) + continue; + if (!test(inode, data)) + continue; + if (atomic_inc_not_zero(&inode->i_count)) + goto out_unlock; + goto slow; + } + inode = NULL; +out_unlock: + rcu_read_unlock(); + return inode; + +slow: + rcu_read_unlock(); + spin_lock(&inode_hash_lock); + inode = __find_inode(sb, head, test, data); + spin_unlock(&inode_hash_lock); + return inode; +} + /* - * find_inode_fast is the fast path version of find_inode, see the comment at + * __find_inode_fast is the fast path version of __find_inode, see the comment at * iget_locked for details. */ -static struct inode *find_inode_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) +static struct inode *__find_inode_fast(struct super_block *sb, + struct hlist_head *head, unsigned long ino) { - struct inode *inode = NULL; + struct inode *inode; lockdep_assert_held(&inode_hash_lock); @@ -834,6 +864,34 @@ static struct inode *find_inode_fast(str return NULL; } +static struct inode *find_inode_fast(struct super_block *sb, + struct hlist_head *head, unsigned long ino) +{ + struct inode *inode; + + rcu_read_lock(); + hlist_for_each_entry_rcu(inode, head, i_hash) { + if (inode->i_ino != ino) + continue; + if (inode->i_sb != sb) + continue; + if (atomic_inc_not_zero(&inode->i_count)) + goto out_unlock; + goto slow; + } + inode = NULL; +out_unlock: + rcu_read_unlock(); + return inode; + +slow: + rcu_read_unlock(); + spin_lock(&inode_hash_lock); + inode = __find_inode_fast(sb, head, ino); + spin_unlock(&inode_hash_lock); + return inode; +} + /* * Each cpu owns a range of LAST_INO_BATCH numbers. * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, @@ -1026,10 +1084,7 @@ struct inode *iget5_locked(struct super_ struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; again: - spin_lock(&inode_hash_lock); inode = find_inode(sb, head, test, data); - spin_unlock(&inode_hash_lock); - if (inode) { wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1045,14 +1100,14 @@ struct inode *iget5_locked(struct super_ spin_lock(&inode_hash_lock); /* We released the lock, so.. */ - old = find_inode(sb, head, test, data); + old = __find_inode(sb, head, test, data); if (!old) { if (set(inode, data)) goto set_failed; spin_lock(&inode->i_lock); inode->i_state = I_NEW; - hlist_add_head(&inode->i_hash, head); + hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); inode_sb_list_add(inode); spin_unlock(&inode_hash_lock); @@ -1104,9 +1159,7 @@ struct inode *iget_locked(struct super_b struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); inode = find_inode_fast(sb, head, ino); - spin_unlock(&inode_hash_lock); if (inode) { wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1122,12 +1175,12 @@ struct inode *iget_locked(struct super_b spin_lock(&inode_hash_lock); /* We released the lock, so.. */ - old = find_inode_fast(sb, head, ino); + old = __find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; spin_lock(&inode->i_lock); inode->i_state = I_NEW; - hlist_add_head(&inode->i_hash, head); + hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); inode_sb_list_add(inode); spin_unlock(&inode_hash_lock); @@ -1258,9 +1311,7 @@ struct inode *ilookup5_nowait(struct sup struct hlist_head *head = inode_hashtable + hash(sb, hashval); struct inode *inode; - spin_lock(&inode_hash_lock); inode = find_inode(sb, head, test, data); - spin_unlock(&inode_hash_lock); return inode; } @@ -1313,10 +1364,7 @@ struct inode *ilookup(struct super_block struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); inode = find_inode_fast(sb, head, ino); - spin_unlock(&inode_hash_lock); - if (inode) { wait_on_inode(inode); if (unlikely(inode_unhashed(inode))) { @@ -1345,7 +1393,7 @@ EXPORT_SYMBOL(ilookup); * the inode_hash_lock spinlock held. * * This is a even more generalized version of ilookup5() when the - * function must never block --- find_inode() can block in + * function must never block --- __find_inode() can block in * __wait_on_freeing_inode() --- or when the caller can not increment * the reference count because the resulting iput() might cause an * inode eviction. The tradeoff is that the @match funtion must be @@ -1402,7 +1450,7 @@ int insert_inode_locked(struct inode *in if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; - hlist_add_head(&inode->i_hash, head); + hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0; @@ -1445,7 +1493,7 @@ int insert_inode_locked4(struct inode *i if (likely(!old)) { spin_lock(&inode->i_lock); inode->i_state |= I_NEW; - hlist_add_head(&inode->i_hash, head); + hlist_add_head_rcu(&inode->i_hash, head); spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); return 0;