[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAGudoHETiJ8G8WeyFYJ6EZ4oxcmqxV3yztZDOxL8PUBGobW_xQ@mail.gmail.com>
Date: Wed, 15 Oct 2025 13:50:25 +0200
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org
Cc: viro@...iv.linux.org.uk, jack@...e.cz, linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH] fs: rework I_NEW handling to operate without fences
can i get some flames on this?
On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@...il.com> wrote:
>
> In the inode hash code grab the state while ->i_lock is held. If found
> to be set, synchronize the sleep once more with the lock held.
>
> In the real world the flag is not set most of the time.
>
> Apart from being simpler to reason about, it comes with a minor speed up
> as now clearing the flag does not require the smp_mb() fence.
>
> While here rename wait_on_inode() to wait_on_new_inode() to line it up
> with __wait_on_freeing_inode().
>
> Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
> ---
>
> This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> This is going to get dedupped later.
>
> There is high repetition of:
> if (unlikely(isnew)) {
> wait_on_new_inode(old);
> if (unlikely(inode_unhashed(old))) {
> iput(old);
> goto again;
> }
>
> I expect this is going to go away after I post a patch to sanitize the
> current APIs for the hash.
>
>
> fs/afs/dir.c | 4 +-
> fs/dcache.c | 10 ----
> fs/gfs2/glock.c | 2 +-
> fs/inode.c | 146 +++++++++++++++++++++++++++------------------
> include/linux/fs.h | 12 +---
> 5 files changed, 93 insertions(+), 81 deletions(-)
>
> diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> index 89d36e3e5c79..f4e9e12373ac 100644
> --- a/fs/afs/dir.c
> +++ b/fs/afs/dir.c
> @@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
> struct inode *inode = NULL, *ti;
> afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
> - bool supports_ibulk;
> + bool supports_ibulk, isnew;
> long ret;
> int i;
>
> @@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
> * callback counters.
> */
> ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
> - afs_ilookup5_test_by_fid, &vp->fid);
> + afs_ilookup5_test_by_fid, &vp->fid, &isnew);
> if (!IS_ERR_OR_NULL(ti)) {
> vnode = AFS_FS_I(ti);
> vp->dv_before = vnode->status.data_version;
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 78ffa7b7e824..25131f105a60 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
> spin_lock(&inode->i_lock);
> __d_instantiate(entry, inode);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW | I_CREATING);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> }
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index b677c0e6b9ab..c9712235e7a0 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
> ip = NULL;
> spin_unlock(&gl->gl_lockref.lock);
> if (ip) {
> - wait_on_inode(&ip->i_inode);
> + wait_on_new_inode(&ip->i_inode);
> if (is_bad_inode(&ip->i_inode)) {
> iput(&ip->i_inode);
> ip = NULL;
> diff --git a/fs/inode.c b/fs/inode.c
> index 3153d725859c..1396f79b2551 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
> }
> EXPORT_SYMBOL(inode_bit_waitqueue);
>
> +void wait_on_new_inode(struct inode *inode)
> +{
> + struct wait_bit_queue_entry wqe;
> + struct wait_queue_head *wq_head;
> +
> + spin_lock(&inode->i_lock);
> + if (!(inode_state_read(inode) & I_NEW)) {
> + spin_unlock(&inode->i_lock);
> + return;
> + }
> +
> + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> + for (;;) {
> + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> + if (!(inode_state_read(inode) & I_NEW))
> + break;
> + spin_unlock(&inode->i_lock);
> + schedule();
> + spin_lock(&inode->i_lock);
> + }
> + finish_wait(wq_head, &wqe.wq_entry);
> + WARN_ON(inode_state_read(inode) & I_NEW);
> + spin_unlock(&inode->i_lock);
> +}
> +EXPORT_SYMBOL(wait_on_new_inode);
> +
> /*
> * Add inode to LRU if needed (inode is unused and clean).
> *
> @@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
> static struct inode *find_inode(struct super_block *sb,
> struct hlist_head *head,
> int (*test)(struct inode *, void *),
> - void *data, bool is_inode_hash_locked)
> + void *data, bool is_inode_hash_locked,
> + bool *isnew)
> {
> struct inode *inode = NULL;
>
> @@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
> return ERR_PTR(-ESTALE);
> }
> __iget(inode);
> + *isnew = !!(inode_state_read(inode) & I_NEW);
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> return inode;
> @@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
> */
> static struct inode *find_inode_fast(struct super_block *sb,
> struct hlist_head *head, unsigned long ino,
> - bool is_inode_hash_locked)
> + bool is_inode_hash_locked, bool *isnew)
> {
> struct inode *inode = NULL;
>
> @@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
> return ERR_PTR(-ESTALE);
> }
> __iget(inode);
> + *isnew = !!(inode_state_read(inode) & I_NEW);
> spin_unlock(&inode->i_lock);
> rcu_read_unlock();
> return inode;
> @@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
> lockdep_annotate_inode_mutex_key(inode);
> spin_lock(&inode->i_lock);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW | I_CREATING);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> }
> @@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
> lockdep_annotate_inode_mutex_key(inode);
> spin_lock(&inode->i_lock);
> WARN_ON(!(inode_state_read(inode) & I_NEW));
> - /*
> - * Pairs with smp_rmb in wait_on_inode().
> - */
> - smp_wmb();
> inode_state_clear(inode, I_NEW);
> - /*
> - * Pairs with the barrier in prepare_to_wait_event() to make sure
> - * ___wait_var_event() either sees the bit cleared or
> - * waitqueue_active() check in wake_up_var() sees the waiter.
> - */
> - smp_mb();
> inode_wake_up_bit(inode, __I_NEW);
> spin_unlock(&inode->i_lock);
> iput(inode);
> @@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> {
> struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
> struct inode *old;
> + bool isnew;
>
> might_sleep();
>
> again:
> spin_lock(&inode_hash_lock);
> - old = find_inode(inode->i_sb, head, test, data, true);
> + old = find_inode(inode->i_sb, head, test, data, true, &isnew);
> if (unlikely(old)) {
> /*
> * Uhhuh, somebody else created the same inode under us.
> @@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
> spin_unlock(&inode_hash_lock);
> if (IS_ERR(old))
> return NULL;
> - wait_on_inode(old);
> - if (unlikely(inode_unhashed(old))) {
> - iput(old);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(old);
> + if (unlikely(inode_unhashed(old))) {
> + iput(old);
> + goto again;
> + }
> }
> return old;
> }
> @@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
> {
> struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> struct inode *inode, *new;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode(sb, head, test, data, false);
> + inode = find_inode(sb, head, test, data, false, &isnew);
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> return inode;
> }
> @@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode_fast(sb, head, ino, false);
> + inode = find_inode_fast(sb, head, ino, false, &isnew);
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> return inode;
> }
> @@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>
> spin_lock(&inode_hash_lock);
> /* We released the lock, so.. */
> - old = find_inode_fast(sb, head, ino, true);
> + old = find_inode_fast(sb, head, ino, true, &isnew);
> if (!old) {
> inode->i_ino = ino;
> spin_lock(&inode->i_lock);
> @@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
> if (IS_ERR(old))
> return NULL;
> inode = old;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
> * Note2: @test is called with the inode_hash_lock held, so can't sleep.
> */
> struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
> - int (*test)(struct inode *, void *), void *data)
> + int (*test)(struct inode *, void *), void *data, bool *isnew)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, hashval);
> struct inode *inode;
>
> spin_lock(&inode_hash_lock);
> - inode = find_inode(sb, head, test, data, true);
> + inode = find_inode(sb, head, test, data, true, isnew);
> spin_unlock(&inode_hash_lock);
>
> return IS_ERR(inode) ? NULL : inode;
> @@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> int (*test)(struct inode *, void *), void *data)
> {
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = ilookup5_nowait(sb, hashval, test, data);
> + inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
> if (inode) {
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
> {
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> struct inode *inode;
> + bool isnew;
>
> might_sleep();
>
> again:
> - inode = find_inode_fast(sb, head, ino, false);
> + inode = find_inode_fast(sb, head, ino, false, &isnew);
>
> if (inode) {
> if (IS_ERR(inode))
> return NULL;
> - wait_on_inode(inode);
> - if (unlikely(inode_unhashed(inode))) {
> - iput(inode);
> - goto again;
> + if (unlikely(isnew)) {
> + wait_on_new_inode(inode);
> + if (unlikely(inode_unhashed(inode))) {
> + iput(inode);
> + goto again;
> + }
> }
> }
> return inode;
> @@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
> struct super_block *sb = inode->i_sb;
> ino_t ino = inode->i_ino;
> struct hlist_head *head = inode_hashtable + hash(sb, ino);
> + bool isnew;
>
> might_sleep();
>
> @@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
> return -EBUSY;
> }
> __iget(old);
> + isnew = !!(inode_state_read(old) & I_NEW);
> spin_unlock(&old->i_lock);
> spin_unlock(&inode_hash_lock);
> - wait_on_inode(old);
> - if (unlikely(!inode_unhashed(old))) {
> - iput(old);
> - return -EBUSY;
> + if (isnew) {
> + wait_on_new_inode(old);
> + if (unlikely(!inode_unhashed(old))) {
> + iput(old);
> + return -EBUSY;
> + }
> }
> iput(old);
> }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 21c73df3ce75..a813abdcf218 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
> hlist_add_fake(&inode->i_hash);
> }
>
> -static inline void wait_on_inode(struct inode *inode)
> -{
> - wait_var_event(inode_state_wait_address(inode, __I_NEW),
> - !(inode_state_read_once(inode) & I_NEW));
> - /*
> - * Pairs with routines clearing I_NEW.
> - */
> - smp_rmb();
> -}
> +void wait_on_new_inode(struct inode *inode);
>
> /*
> * inode->i_rwsem nesting subclasses for the lock validator:
> @@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
>
> extern struct inode *ilookup5_nowait(struct super_block *sb,
> unsigned long hashval, int (*test)(struct inode *, void *),
> - void *data);
> + void *data, bool *isnew);
> extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
> int (*test)(struct inode *, void *), void *data);
> extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
> --
> 2.34.1
>
Powered by blists - more mailing lists