lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAGudoHETiJ8G8WeyFYJ6EZ4oxcmqxV3yztZDOxL8PUBGobW_xQ@mail.gmail.com>
Date: Wed, 15 Oct 2025 13:50:25 +0200
From: Mateusz Guzik <mjguzik@...il.com>
To: brauner@...nel.org
Cc: viro@...iv.linux.org.uk, jack@...e.cz, linux-kernel@...r.kernel.org, 
	linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH] fs: rework I_NEW handling to operate without fences

can i get some flames on this?

On Sat, Oct 11, 2025 at 12:17 AM Mateusz Guzik <mjguzik@...il.com> wrote:
>
> In the inode hash code grab the state while ->i_lock is held. If found
> to be set, synchronize the sleep once more with the lock held.
>
> In the real world the flag is not set most of the time.
>
> Apart from being simpler to reason about, it comes with a minor speed up
> as now clearing the flag does not require the smp_mb() fence.
>
> While here rename wait_on_inode() to wait_on_new_inode() to line it up
> with __wait_on_freeing_inode().
>
> Signed-off-by: Mateusz Guzik <mjguzik@...il.com>
> ---
>
> This temporarily duplicated sleep code from inode_wait_for_lru_isolating().
> This is going to get dedupped later.
>
> There is high repetition of:
>         if (unlikely(isnew)) {
>                 wait_on_new_inode(old);
>                 if (unlikely(inode_unhashed(old))) {
>                         iput(old);
>                         goto again;
>                 }
>
> I expect this is going to go away after I post a patch to sanitize the
> current APIs for the hash.
>
>
>  fs/afs/dir.c       |   4 +-
>  fs/dcache.c        |  10 ----
>  fs/gfs2/glock.c    |   2 +-
>  fs/inode.c         | 146 +++++++++++++++++++++++++++------------------
>  include/linux/fs.h |  12 +---
>  5 files changed, 93 insertions(+), 81 deletions(-)
>
> diff --git a/fs/afs/dir.c b/fs/afs/dir.c
> index 89d36e3e5c79..f4e9e12373ac 100644
> --- a/fs/afs/dir.c
> +++ b/fs/afs/dir.c
> @@ -779,7 +779,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
>         struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
>         struct inode *inode = NULL, *ti;
>         afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
> -       bool supports_ibulk;
> +       bool supports_ibulk, isnew;
>         long ret;
>         int i;
>
> @@ -850,7 +850,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry)
>                          * callback counters.
>                          */
>                         ti = ilookup5_nowait(dir->i_sb, vp->fid.vnode,
> -                                            afs_ilookup5_test_by_fid, &vp->fid);
> +                                            afs_ilookup5_test_by_fid, &vp->fid, &isnew);
>                         if (!IS_ERR_OR_NULL(ti)) {
>                                 vnode = AFS_FS_I(ti);
>                                 vp->dv_before = vnode->status.data_version;
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 78ffa7b7e824..25131f105a60 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -1981,17 +1981,7 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
>         spin_lock(&inode->i_lock);
>         __d_instantiate(entry, inode);
>         WARN_ON(!(inode_state_read(inode) & I_NEW));
> -       /*
> -        * Pairs with smp_rmb in wait_on_inode().
> -        */
> -       smp_wmb();
>         inode_state_clear(inode, I_NEW | I_CREATING);
> -       /*
> -        * Pairs with the barrier in prepare_to_wait_event() to make sure
> -        * ___wait_var_event() either sees the bit cleared or
> -        * waitqueue_active() check in wake_up_var() sees the waiter.
> -        */
> -       smp_mb();
>         inode_wake_up_bit(inode, __I_NEW);
>         spin_unlock(&inode->i_lock);
>  }
> diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
> index b677c0e6b9ab..c9712235e7a0 100644
> --- a/fs/gfs2/glock.c
> +++ b/fs/gfs2/glock.c
> @@ -957,7 +957,7 @@ static struct gfs2_inode *gfs2_grab_existing_inode(struct gfs2_glock *gl)
>                 ip = NULL;
>         spin_unlock(&gl->gl_lockref.lock);
>         if (ip) {
> -               wait_on_inode(&ip->i_inode);
> +               wait_on_new_inode(&ip->i_inode);
>                 if (is_bad_inode(&ip->i_inode)) {
>                         iput(&ip->i_inode);
>                         ip = NULL;
> diff --git a/fs/inode.c b/fs/inode.c
> index 3153d725859c..1396f79b2551 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -558,6 +558,32 @@ struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
>  }
>  EXPORT_SYMBOL(inode_bit_waitqueue);
>
> +void wait_on_new_inode(struct inode *inode)
> +{
> +       struct wait_bit_queue_entry wqe;
> +       struct wait_queue_head *wq_head;
> +
> +       spin_lock(&inode->i_lock);
> +       if (!(inode_state_read(inode) & I_NEW)) {
> +               spin_unlock(&inode->i_lock);
> +               return;
> +       }
> +
> +       wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
> +       for (;;) {
> +               prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
> +               if (!(inode_state_read(inode) & I_NEW))
> +                       break;
> +               spin_unlock(&inode->i_lock);
> +               schedule();
> +               spin_lock(&inode->i_lock);
> +       }
> +       finish_wait(wq_head, &wqe.wq_entry);
> +       WARN_ON(inode_state_read(inode) & I_NEW);
> +       spin_unlock(&inode->i_lock);
> +}
> +EXPORT_SYMBOL(wait_on_new_inode);
> +
>  /*
>   * Add inode to LRU if needed (inode is unused and clean).
>   *
> @@ -1008,7 +1034,8 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
>  static struct inode *find_inode(struct super_block *sb,
>                                 struct hlist_head *head,
>                                 int (*test)(struct inode *, void *),
> -                               void *data, bool is_inode_hash_locked)
> +                               void *data, bool is_inode_hash_locked,
> +                               bool *isnew)
>  {
>         struct inode *inode = NULL;
>
> @@ -1035,6 +1062,7 @@ static struct inode *find_inode(struct super_block *sb,
>                         return ERR_PTR(-ESTALE);
>                 }
>                 __iget(inode);
> +               *isnew = !!(inode_state_read(inode) & I_NEW);
>                 spin_unlock(&inode->i_lock);
>                 rcu_read_unlock();
>                 return inode;
> @@ -1049,7 +1077,7 @@ static struct inode *find_inode(struct super_block *sb,
>   */
>  static struct inode *find_inode_fast(struct super_block *sb,
>                                 struct hlist_head *head, unsigned long ino,
> -                               bool is_inode_hash_locked)
> +                               bool is_inode_hash_locked, bool *isnew)
>  {
>         struct inode *inode = NULL;
>
> @@ -1076,6 +1104,7 @@ static struct inode *find_inode_fast(struct super_block *sb,
>                         return ERR_PTR(-ESTALE);
>                 }
>                 __iget(inode);
> +               *isnew = !!(inode_state_read(inode) & I_NEW);
>                 spin_unlock(&inode->i_lock);
>                 rcu_read_unlock();
>                 return inode;
> @@ -1181,17 +1210,7 @@ void unlock_new_inode(struct inode *inode)
>         lockdep_annotate_inode_mutex_key(inode);
>         spin_lock(&inode->i_lock);
>         WARN_ON(!(inode_state_read(inode) & I_NEW));
> -       /*
> -        * Pairs with smp_rmb in wait_on_inode().
> -        */
> -       smp_wmb();
>         inode_state_clear(inode, I_NEW | I_CREATING);
> -       /*
> -        * Pairs with the barrier in prepare_to_wait_event() to make sure
> -        * ___wait_var_event() either sees the bit cleared or
> -        * waitqueue_active() check in wake_up_var() sees the waiter.
> -        */
> -       smp_mb();
>         inode_wake_up_bit(inode, __I_NEW);
>         spin_unlock(&inode->i_lock);
>  }
> @@ -1202,17 +1221,7 @@ void discard_new_inode(struct inode *inode)
>         lockdep_annotate_inode_mutex_key(inode);
>         spin_lock(&inode->i_lock);
>         WARN_ON(!(inode_state_read(inode) & I_NEW));
> -       /*
> -        * Pairs with smp_rmb in wait_on_inode().
> -        */
> -       smp_wmb();
>         inode_state_clear(inode, I_NEW);
> -       /*
> -        * Pairs with the barrier in prepare_to_wait_event() to make sure
> -        * ___wait_var_event() either sees the bit cleared or
> -        * waitqueue_active() check in wake_up_var() sees the waiter.
> -        */
> -       smp_mb();
>         inode_wake_up_bit(inode, __I_NEW);
>         spin_unlock(&inode->i_lock);
>         iput(inode);
> @@ -1286,12 +1295,13 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
>  {
>         struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
>         struct inode *old;
> +       bool isnew;
>
>         might_sleep();
>
>  again:
>         spin_lock(&inode_hash_lock);
> -       old = find_inode(inode->i_sb, head, test, data, true);
> +       old = find_inode(inode->i_sb, head, test, data, true, &isnew);
>         if (unlikely(old)) {
>                 /*
>                  * Uhhuh, somebody else created the same inode under us.
> @@ -1300,10 +1310,12 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval,
>                 spin_unlock(&inode_hash_lock);
>                 if (IS_ERR(old))
>                         return NULL;
> -               wait_on_inode(old);
> -               if (unlikely(inode_unhashed(old))) {
> -                       iput(old);
> -                       goto again;
> +               if (unlikely(isnew)) {
> +                       wait_on_new_inode(old);
> +                       if (unlikely(inode_unhashed(old))) {
> +                               iput(old);
> +                               goto again;
> +                       }
>                 }
>                 return old;
>         }
> @@ -1391,18 +1403,21 @@ struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval,
>  {
>         struct hlist_head *head = inode_hashtable + hash(sb, hashval);
>         struct inode *inode, *new;
> +       bool isnew;
>
>         might_sleep();
>
>  again:
> -       inode = find_inode(sb, head, test, data, false);
> +       inode = find_inode(sb, head, test, data, false, &isnew);
>         if (inode) {
>                 if (IS_ERR(inode))
>                         return NULL;
> -               wait_on_inode(inode);
> -               if (unlikely(inode_unhashed(inode))) {
> -                       iput(inode);
> -                       goto again;
> +               if (unlikely(isnew)) {
> +                       wait_on_new_inode(inode);
> +                       if (unlikely(inode_unhashed(inode))) {
> +                               iput(inode);
> +                               goto again;
> +                       }
>                 }
>                 return inode;
>         }
> @@ -1434,18 +1449,21 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>  {
>         struct hlist_head *head = inode_hashtable + hash(sb, ino);
>         struct inode *inode;
> +       bool isnew;
>
>         might_sleep();
>
>  again:
> -       inode = find_inode_fast(sb, head, ino, false);
> +       inode = find_inode_fast(sb, head, ino, false, &isnew);
>         if (inode) {
>                 if (IS_ERR(inode))
>                         return NULL;
> -               wait_on_inode(inode);
> -               if (unlikely(inode_unhashed(inode))) {
> -                       iput(inode);
> -                       goto again;
> +               if (unlikely(isnew)) {
> +                       wait_on_new_inode(inode);
> +                       if (unlikely(inode_unhashed(inode))) {
> +                               iput(inode);
> +                               goto again;
> +                       }
>                 }
>                 return inode;
>         }
> @@ -1456,7 +1474,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>
>                 spin_lock(&inode_hash_lock);
>                 /* We released the lock, so.. */
> -               old = find_inode_fast(sb, head, ino, true);
> +               old = find_inode_fast(sb, head, ino, true, &isnew);
>                 if (!old) {
>                         inode->i_ino = ino;
>                         spin_lock(&inode->i_lock);
> @@ -1482,10 +1500,12 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino)
>                 if (IS_ERR(old))
>                         return NULL;
>                 inode = old;
> -               wait_on_inode(inode);
> -               if (unlikely(inode_unhashed(inode))) {
> -                       iput(inode);
> -                       goto again;
> +               if (unlikely(isnew)) {
> +                       wait_on_new_inode(inode);
> +                       if (unlikely(inode_unhashed(inode))) {
> +                               iput(inode);
> +                               goto again;
> +                       }
>                 }
>         }
>         return inode;
> @@ -1586,13 +1606,13 @@ EXPORT_SYMBOL(igrab);
>   * Note2: @test is called with the inode_hash_lock held, so can't sleep.
>   */
>  struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
> -               int (*test)(struct inode *, void *), void *data)
> +               int (*test)(struct inode *, void *), void *data, bool *isnew)
>  {
>         struct hlist_head *head = inode_hashtable + hash(sb, hashval);
>         struct inode *inode;
>
>         spin_lock(&inode_hash_lock);
> -       inode = find_inode(sb, head, test, data, true);
> +       inode = find_inode(sb, head, test, data, true, isnew);
>         spin_unlock(&inode_hash_lock);
>
>         return IS_ERR(inode) ? NULL : inode;
> @@ -1620,16 +1640,19 @@ struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
>                 int (*test)(struct inode *, void *), void *data)
>  {
>         struct inode *inode;
> +       bool isnew;
>
>         might_sleep();
>
>  again:
> -       inode = ilookup5_nowait(sb, hashval, test, data);
> +       inode = ilookup5_nowait(sb, hashval, test, data, &isnew);
>         if (inode) {
> -               wait_on_inode(inode);
> -               if (unlikely(inode_unhashed(inode))) {
> -                       iput(inode);
> -                       goto again;
> +               if (unlikely(isnew)) {
> +                       wait_on_new_inode(inode);
> +                       if (unlikely(inode_unhashed(inode))) {
> +                               iput(inode);
> +                               goto again;
> +                       }
>                 }
>         }
>         return inode;
> @@ -1648,19 +1671,22 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
>  {
>         struct hlist_head *head = inode_hashtable + hash(sb, ino);
>         struct inode *inode;
> +       bool isnew;
>
>         might_sleep();
>
>  again:
> -       inode = find_inode_fast(sb, head, ino, false);
> +       inode = find_inode_fast(sb, head, ino, false, &isnew);
>
>         if (inode) {
>                 if (IS_ERR(inode))
>                         return NULL;
> -               wait_on_inode(inode);
> -               if (unlikely(inode_unhashed(inode))) {
> -                       iput(inode);
> -                       goto again;
> +               if (unlikely(isnew)) {
> +                       wait_on_new_inode(inode);
> +                       if (unlikely(inode_unhashed(inode))) {
> +                               iput(inode);
> +                               goto again;
> +                       }
>                 }
>         }
>         return inode;
> @@ -1800,6 +1826,7 @@ int insert_inode_locked(struct inode *inode)
>         struct super_block *sb = inode->i_sb;
>         ino_t ino = inode->i_ino;
>         struct hlist_head *head = inode_hashtable + hash(sb, ino);
> +       bool isnew;
>
>         might_sleep();
>
> @@ -1832,12 +1859,15 @@ int insert_inode_locked(struct inode *inode)
>                         return -EBUSY;
>                 }
>                 __iget(old);
> +               isnew = !!(inode_state_read(old) & I_NEW);
>                 spin_unlock(&old->i_lock);
>                 spin_unlock(&inode_hash_lock);
> -               wait_on_inode(old);
> -               if (unlikely(!inode_unhashed(old))) {
> -                       iput(old);
> -                       return -EBUSY;
> +               if (isnew) {
> +                       wait_on_new_inode(old);
> +                       if (unlikely(!inode_unhashed(old))) {
> +                               iput(old);
> +                               return -EBUSY;
> +                       }
>                 }
>                 iput(old);
>         }
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 21c73df3ce75..a813abdcf218 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1030,15 +1030,7 @@ static inline void inode_fake_hash(struct inode *inode)
>         hlist_add_fake(&inode->i_hash);
>  }
>
> -static inline void wait_on_inode(struct inode *inode)
> -{
> -       wait_var_event(inode_state_wait_address(inode, __I_NEW),
> -                      !(inode_state_read_once(inode) & I_NEW));
> -       /*
> -        * Pairs with routines clearing I_NEW.
> -        */
> -       smp_rmb();
> -}
> +void wait_on_new_inode(struct inode *inode);
>
>  /*
>   * inode->i_rwsem nesting subclasses for the lock validator:
> @@ -3417,7 +3409,7 @@ extern void d_mark_dontcache(struct inode *inode);
>
>  extern struct inode *ilookup5_nowait(struct super_block *sb,
>                 unsigned long hashval, int (*test)(struct inode *, void *),
> -               void *data);
> +               void *data, bool *isnew);
>  extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
>                 int (*test)(struct inode *, void *), void *data);
>  extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
> --
> 2.34.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ