[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <8a925e3f-bd27-ac32-841a-a79690d971d7@google.com>
Date: Sun, 14 Dec 2025 23:38:28 -0800 (PST)
From: Hugh Dickins <hughd@...gle.com>
To: Al Viro <viro@...iv.linux.org.uk>
cc: Hugh Dickins <hughd@...gle.com>, Miklos Szeredi <miklos@...redi.hu>,
Christian Brauner <brauner@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Baolin Wang <baolin.wang@...ux.alibaba.com>, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, linux-mm@...ck.org,
Linus Torvalds <torvalds@...ux-foundation.org>,
Chuck Lever <chuck.lever@...cle.com>
Subject: Re: [RFC][PATCH 2/2] shmem: fix recovery on rename failures
On Sun, 14 Dec 2025, Al Viro wrote:
> maple_tree insertions can fail if we are seriously short on memory;
> simple_offset_rename() does not recover well if it runs into that.
> The same goes for simple_offset_rename_exchange().
>
> Moreover, shmem_whiteout() expects that if it succeeds, the caller will
> progress to d_move(), i.e. that shmem_rename2() won't fail past the
> successful call of shmem_whiteout().
>
> Not hard to fix, fortunately - mtree_store() can't fail if the index we
> are trying to store into is already present in the tree as a singleton.
>
> For simple_offset_rename_exchange() that's enough - we just need to be
> careful about the order of operations.
>
> For simple_offset_rename() solution is to preinsert the target into the
> tree for new_dir; the rest can be done without any potentially failing
> operations.
>
> That preinsertion has to be done in shmem_rename2() rather than in
> simple_offset_rename() itself - otherwise we'd need to deal with the
> possibility of failure after successful shmem_whiteout().
>
> Fixes: a2e459555c5f ("shmem: stable directory offsets")
> Signed-off-by: Al Viro <viro@...iv.linux.org.uk>
Well, what you say above, and what you've done below, make sense to me;
and neither I nor xfstests noticed anything wrong (aside from one
trivium - I'd prefer "bool had_offset = false" to "int ...";
maybe placed one line up to look prettier).
But please don't expect proper engagement from me on this one,
it's above my head, and I'll trust you and Chuck on it.
Thanks,
Hugh
> ---
> fs/libfs.c | 50 +++++++++++++++++++---------------------------
> include/linux/fs.h | 2 +-
> mm/shmem.c | 18 ++++++++++++-----
> 3 files changed, 35 insertions(+), 35 deletions(-)
>
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 9264523be85c..591eb649ebba 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -346,22 +346,22 @@ void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
> * User space expects the directory offset value of the replaced
> * (new) directory entry to be unchanged after a rename.
> *
> - * Returns zero on success, a negative errno value on failure.
> + * Caller must have grabbed a slot for new_dentry in the maple_tree
> + * associated with new_dir, even if dentry is negative.
> */
> -int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
> - struct inode *new_dir, struct dentry *new_dentry)
> +void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
> + struct inode *new_dir, struct dentry *new_dentry)
> {
> struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
> struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
> long new_offset = dentry2offset(new_dentry);
>
> - simple_offset_remove(old_ctx, old_dentry);
> + if (WARN_ON(!new_offset))
> + return;
>
> - if (new_offset) {
> - offset_set(new_dentry, 0);
> - return simple_offset_replace(new_ctx, old_dentry, new_offset);
> - }
> - return simple_offset_add(new_ctx, old_dentry);
> + simple_offset_remove(old_ctx, old_dentry);
> + offset_set(new_dentry, 0);
> + WARN_ON(simple_offset_replace(new_ctx, old_dentry, new_offset));
> }
>
> /**
> @@ -388,31 +388,23 @@ int simple_offset_rename_exchange(struct inode *old_dir,
> long new_index = dentry2offset(new_dentry);
> int ret;
>
> - simple_offset_remove(old_ctx, old_dentry);
> - simple_offset_remove(new_ctx, new_dentry);
> + if (WARN_ON(!old_index || !new_index))
> + return -EINVAL;
>
> - ret = simple_offset_replace(new_ctx, old_dentry, new_index);
> - if (ret)
> - goto out_restore;
> + ret = mtree_store(&new_ctx->mt, new_index, old_dentry, GFP_KERNEL);
> + if (WARN_ON(ret))
> + return ret;
>
> - ret = simple_offset_replace(old_ctx, new_dentry, old_index);
> - if (ret) {
> - simple_offset_remove(new_ctx, old_dentry);
> - goto out_restore;
> + ret = mtree_store(&old_ctx->mt, old_index, new_dentry, GFP_KERNEL);
> + if (WARN_ON(ret)) {
> + mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL);
> + return ret;
> }
>
> - ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
> - if (ret) {
> - simple_offset_remove(new_ctx, old_dentry);
> - simple_offset_remove(old_ctx, new_dentry);
> - goto out_restore;
> - }
> + offset_set(old_dentry, new_index);
> + offset_set(new_dentry, old_index);
> + simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
> return 0;
> -
> -out_restore:
> - (void)simple_offset_replace(old_ctx, old_dentry, old_index);
> - (void)simple_offset_replace(new_ctx, new_dentry, new_index);
> - return ret;
> }
>
> /**
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index 04ceeca12a0d..f5c9cf28c4dc 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -3247,7 +3247,7 @@ struct offset_ctx {
> void simple_offset_init(struct offset_ctx *octx);
> int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
> void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
> -int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
> +void simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
> struct inode *new_dir, struct dentry *new_dentry);
> int simple_offset_rename_exchange(struct inode *old_dir,
> struct dentry *old_dentry,
> diff --git a/mm/shmem.c b/mm/shmem.c
> index d3edc809e2e7..4232f8a39a43 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -4039,6 +4039,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
> struct inode *inode = d_inode(old_dentry);
> int they_are_dirs = S_ISDIR(inode->i_mode);
> int error;
> + int had_offset = false;
>
> if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
> return -EINVAL;
> @@ -4050,16 +4051,23 @@ static int shmem_rename2(struct mnt_idmap *idmap,
> if (!simple_empty(new_dentry))
> return -ENOTEMPTY;
>
> + error = simple_offset_add(shmem_get_offset_ctx(new_dir), new_dentry);
> + if (error == -EBUSY)
> + had_offset = true;
> + else if (unlikely(error))
> + return error;
> +
> if (flags & RENAME_WHITEOUT) {
> error = shmem_whiteout(idmap, old_dir, old_dentry);
> - if (error)
> + if (error) {
> + if (!had_offset)
> + simple_offset_remove(shmem_get_offset_ctx(new_dir),
> + new_dentry);
> return error;
> + }
> }
>
> - error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
> - if (error)
> - return error;
> -
> + simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry);
> if (d_really_is_positive(new_dentry)) {
> (void) shmem_unlink(new_dir, new_dentry);
> if (they_are_dirs) {
> --
> 2.47.3
Powered by blists - more mailing lists