[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250828-aufbau-abblendlicht-a9cf118d33e8@brauner>
Date: Thu, 28 Aug 2025 13:02:31 +0200
From: Christian Brauner <brauner@...nel.org>
To: Josef Bacik <josef@...icpanda.com>
Cc: linux-fsdevel@...r.kernel.org, linux-btrfs@...r.kernel.org,
kernel-team@...com, linux-ext4@...r.kernel.org, linux-xfs@...r.kernel.org,
viro@...iv.linux.org.uk, amir73il@...il.com
Subject: Re: [PATCH v2 20/54] fs: disallow 0 reference count inodes
On Tue, Aug 26, 2025 at 11:39:20AM -0400, Josef Bacik wrote:
> Now that we take a full reference for inodes on the LRU, move the logic
> to add the inode to the LRU to before we drop our last reference. This
> allows us to ensure that if the inode has a reference count it can be
> used, and we no longer hold onto inodes that have a 0 reference count.
>
> Signed-off-by: Josef Bacik <josef@...icpanda.com>
> ---
> fs/inode.c | 61 ++++++++++++++++++++++++++++++++++++------------------
> 1 file changed, 41 insertions(+), 20 deletions(-)
>
> diff --git a/fs/inode.c b/fs/inode.c
> index 9001f809add0..d1668f7fb73e 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -598,7 +598,7 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
>
> if (inode->i_state & (I_FREEING | I_WILL_FREE))
> return;
> - if (icount_read(inode))
> + if (icount_read(inode) != 1)
> return;
> if (inode->__i_nlink == 0)
> return;
> @@ -1950,28 +1950,11 @@ EXPORT_SYMBOL(generic_delete_inode);
> * in cache if fs is alive, sync and evict if fs is
> * shutting down.
> */
> -static void iput_final(struct inode *inode, bool skip_lru)
> +static void iput_final(struct inode *inode, bool drop)
> {
> - struct super_block *sb = inode->i_sb;
> - const struct super_operations *op = inode->i_sb->s_op;
> unsigned long state;
> - int drop;
>
> WARN_ON(inode->i_state & I_NEW);
> -
> - if (op->drop_inode)
> - drop = op->drop_inode(inode);
> - else
> - drop = generic_drop_inode(inode);
> -
> - if (!drop && !skip_lru &&
> - !(inode->i_state & I_DONTCACHE) &&
> - (sb->s_flags & SB_ACTIVE)) {
> - __inode_add_lru(inode, true);
> - spin_unlock(&inode->i_lock);
> - return;
> - }
> -
> WARN_ON(!list_empty(&inode->i_lru));
>
> state = inode->i_state;
> @@ -1993,8 +1976,37 @@ static void iput_final(struct inode *inode, bool skip_lru)
> evict(inode);
> }
>
> +static bool maybe_add_lru(struct inode *inode, bool skip_lru)
> +{
> + const struct super_operations *op = inode->i_sb->s_op;
> + const struct super_block *sb = inode->i_sb;
> + bool drop = false;
> +
> + if (op->drop_inode)
> + drop = op->drop_inode(inode);
> + else
> + drop = generic_drop_inode(inode);
> +
> + if (drop)
> + return drop;
> +
> + if (skip_lru)
> + return drop;
> +
> + if (inode->i_state & I_DONTCACHE)
> + return drop;
> +
> + if (!(sb->s_flags & SB_ACTIVE))
> + return drop;
> +
> + __inode_add_lru(inode, true);
> + return drop;
> +}
> +
> static void __iput(struct inode *inode, bool skip_lru)
> {
> + bool drop;
> +
> if (!inode)
> return;
> BUG_ON(inode->i_state & I_CLEAR);
> @@ -2010,9 +2022,18 @@ static void __iput(struct inode *inode, bool skip_lru)
> }
>
> spin_lock(&inode->i_lock);
> +
> + /*
> + * If we want to keep the inode around on an LRU we will grab a ref to
> + * the inode when we add it to the LRU list, so we can safely drop the
> + * callers reference after this. If we didn't add the inode to the LRU
> + * then the refcount will still be 1 and we can do the final iput.
> + */
> + drop = maybe_add_lru(inode, skip_lru);
So before we only put the inode on an LRU when we knew we this was the
last reference. Now we're putting it on the LRU before we know that for
sure.
While __inode_add_lru() now checks whether this is potentially the last
reference we're goint to but, someone could grab another full reference
in between the check, putting it on the LRU and atomic_dec_and_test().
So we are left with an inode on the LRU that previously would not have
ended up there. And then later we need to remove it again. I guess the
arguments are:
(1) It's not a big deal because if the shrinker runs we'll just toss that
inode from the LRU again.
(2) If it ended up being put on the cached LRU it'll stay there for at
least as long as the inode is referenced? I guess that's ok too.
(3) The race is not that common?
Anyway, again it would be nice to have some comments noting this
behavior and arguing why that's ok.
> +
> if (atomic_dec_and_test(&inode->i_count)) {
> /* iput_final() drops i_lock */
> - iput_final(inode, skip_lru);
> + iput_final(inode, drop);
> } else {
> spin_unlock(&inode->i_lock);
> }
> --
> 2.49.0
>
Powered by blists - more mailing lists