[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86fe3ab7-943d-415f-822d-ffa2e7c18640@linux.alibaba.com>
Date: Wed, 10 Dec 2025 09:55:03 +0800
From: Joseph Qi <joseph.qi@...ux.alibaba.com>
To: Heming Zhao <heming.zhao@...e.com>, mark@...heh.com, jlbec@...lplan.org
Cc: ocfs2-devel@...ts.linux.dev, linux-kernel@...r.kernel.org,
glass.su@...e.com
Subject: Re: [PATCH RESEND v4 2/2] ocfs2: detect released suballocator BG for
fh_to_[dentry|parent]
On 2025/12/2 14:39, Heming Zhao wrote:
> After ocfs2 gained the ability to reclaim suballocator free block
> group (BGs), a suballocator block group may be released. This change
> causes the xfstest case generic/426 to fail.
>
> generic/426 expects return value -ENOENT or -ESTALE, but the current
> code triggers -EROFS.
>
> Call stack before ocfs2 gained the ability to reclaim bg:
>
> ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
> ocfs2_get_dentry
> + ocfs2_test_inode_bit
> | ocfs2_test_suballoc_bit
> | + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
> | | //the bg block was always found.
> | + *res = ocfs2_test_bit //unlink was called, and the bit is zero
> |
> + if (!set) //because the above *res is 0
> status = -ESTALE //the generic/426 expected return value
>
> Current call stack that triggers -EROFS:
>
> ocfs2_get_dentry
> ocfs2_test_inode_bit
> ocfs2_test_suballoc_bit
> ocfs2_read_group_descriptor
> + if reading a released bg, validation fails and triggers -EROFS
>
> How to fix:
> Since the read BG is already released, we must avoid triggering -EROFS.
> With this commit, we use ocfs2_read_hint_group_descriptor() to detect
> the released BG block. This approach quietly handles this type of error
> and returns -EINVAL, which triggers the caller's existing conversion
> path to -ESTALE.
>
> Signed-off-by: Heming Zhao <heming.zhao@...e.com>
> Reviewed-by: Su Yue <glass.su@...e.com>
> ---
> fs/ocfs2/suballoc.c | 28 ++++++++++++++++++----------
> 1 file changed, 18 insertions(+), 10 deletions(-)
>
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index de2f09217142..a126d83ddb1c 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -3152,7 +3152,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> struct ocfs2_group_desc *group;
> struct buffer_head *group_bh = NULL;
> u64 bg_blkno;
> - int status;
> + int status, quiet = 0, released;
>
> trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
> (unsigned int)bit);
> @@ -3168,11 +3168,15 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
>
> bg_blkno = group_blkno ? group_blkno :
> ocfs2_which_suballoc_group(blkno, bit);
> - status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
> - &group_bh);
> - if (status < 0) {
> + status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
> + &group_bh, &released);
> + if (released) {
> + quiet = 1;
> + status = -EINVAL;
> + goto bail;
> + } else if (status < 0) {
> mlog(ML_ERROR, "read group %llu failed %d\n",
> - (unsigned long long)bg_blkno, status);
> + (unsigned long long)bg_blkno, status);
> goto bail;
> }
>
> @@ -3182,7 +3186,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> bail:
> brelse(group_bh);
>
> - if (status)
> + if (status && (!quiet))
> mlog_errno(status);
> return status;
> }
> @@ -3202,7 +3206,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
> */
> int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> {
> - int status;
> + int status, quiet = 0;
> u64 group_blkno = 0;
> u16 suballoc_bit = 0, suballoc_slot = 0;
> struct inode *inode_alloc_inode;
> @@ -3244,8 +3248,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
>
> status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
> group_blkno, blkno, suballoc_bit, res);
> - if (status < 0)
> - mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> + if (status < 0) {
> + if (status == -EINVAL)
This seems not right, since there is other case which will also return -EINVAL.
So how about return -ESTALE in this case?
Thanks,
Joseph
> + quiet = 1;
> + else
> + mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> + }
>
> ocfs2_inode_unlock(inode_alloc_inode, 0);
> inode_unlock(inode_alloc_inode);
> @@ -3253,7 +3261,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
> iput(inode_alloc_inode);
> brelse(alloc_bh);
> bail:
> - if (status)
> + if (status && !quiet)
> mlog_errno(status);
> return status;
> }
Powered by blists - more mailing lists