lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86fe3ab7-943d-415f-822d-ffa2e7c18640@linux.alibaba.com>
Date: Wed, 10 Dec 2025 09:55:03 +0800
From: Joseph Qi <joseph.qi@...ux.alibaba.com>
To: Heming Zhao <heming.zhao@...e.com>, mark@...heh.com, jlbec@...lplan.org
Cc: ocfs2-devel@...ts.linux.dev, linux-kernel@...r.kernel.org,
 glass.su@...e.com
Subject: Re: [PATCH RESEND v4 2/2] ocfs2: detect released suballocator BG for
 fh_to_[dentry|parent]



On 2025/12/2 14:39, Heming Zhao wrote:
> After ocfs2 gained the ability to reclaim suballocator free block
> group (BGs), a suballocator block group may be released. This change
> causes the xfstest case generic/426 to fail.
> 
> generic/426 expects return value -ENOENT or -ESTALE, but the current
> code triggers -EROFS.
> 
> Call stack before ocfs2 gained the ability to reclaim bg:
> 
> ocfs2_fh_to_dentry //or ocfs2_fh_to_parent
>  ocfs2_get_dentry
>   + ocfs2_test_inode_bit
>   |  ocfs2_test_suballoc_bit
>   |   + ocfs2_read_group_descriptor //Since ocfs2 never releases the bg,
>   |   |                             //the bg block was always found.
>   |   + *res = ocfs2_test_bit //unlink was called, and the bit is zero
>   |
>   + if (!set) //because the above *res is 0
>      status = -ESTALE //the generic/426 expected return value
> 
> Current call stack that triggers -EROFS:
> 
> ocfs2_get_dentry
>  ocfs2_test_inode_bit
>   ocfs2_test_suballoc_bit
>    ocfs2_read_group_descriptor
>     + if reading a released bg, validation fails and triggers -EROFS
> 
> How to fix:
> Since the read BG is already released, we must avoid triggering -EROFS.
> With this commit, we use ocfs2_read_hint_group_descriptor() to detect
> the released BG block. This approach quietly handles this type of error
> and returns -EINVAL, which triggers the caller's existing conversion
> path to -ESTALE.
> 
> Signed-off-by: Heming Zhao <heming.zhao@...e.com>
> Reviewed-by: Su Yue <glass.su@...e.com>
> ---
>  fs/ocfs2/suballoc.c | 28 ++++++++++++++++++----------
>  1 file changed, 18 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
> index de2f09217142..a126d83ddb1c 100644
> --- a/fs/ocfs2/suballoc.c
> +++ b/fs/ocfs2/suballoc.c
> @@ -3152,7 +3152,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
>  	struct ocfs2_group_desc *group;
>  	struct buffer_head *group_bh = NULL;
>  	u64 bg_blkno;
> -	int status;
> +	int status, quiet = 0, released;
>  
>  	trace_ocfs2_test_suballoc_bit((unsigned long long)blkno,
>  				      (unsigned int)bit);
> @@ -3168,11 +3168,15 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
>  
>  	bg_blkno = group_blkno ? group_blkno :
>  		   ocfs2_which_suballoc_group(blkno, bit);
> -	status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
> -					     &group_bh);
> -	if (status < 0) {
> +	status = ocfs2_read_hint_group_descriptor(suballoc, alloc_di, bg_blkno,
> +					     &group_bh, &released);
> +	if (released) {
> +		quiet = 1;
> +		status = -EINVAL;
> +		goto bail;
> +	} else if (status < 0) {
>  		mlog(ML_ERROR, "read group %llu failed %d\n",
> -		     (unsigned long long)bg_blkno, status);
> +				(unsigned long long)bg_blkno, status);
>  		goto bail;
>  	}
>  
> @@ -3182,7 +3186,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
>  bail:
>  	brelse(group_bh);
>  
> -	if (status)
> +	if (status && (!quiet))
>  		mlog_errno(status);
>  	return status;
>  }
> @@ -3202,7 +3206,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
>   */
>  int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
>  {
> -	int status;
> +	int status, quiet = 0;
>  	u64 group_blkno = 0;
>  	u16 suballoc_bit = 0, suballoc_slot = 0;
>  	struct inode *inode_alloc_inode;
> @@ -3244,8 +3248,12 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
>  
>  	status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
>  					 group_blkno, blkno, suballoc_bit, res);
> -	if (status < 0)
> -		mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> +	if (status < 0) {
> +		if (status == -EINVAL)

This seems not right, since there is other case which will also return -EINVAL.
So how about return -ESTALE in this case?

Thanks,
Joseph

> +			quiet = 1;
> +		else
> +			mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
> +	}
>  
>  	ocfs2_inode_unlock(inode_alloc_inode, 0);
>  	inode_unlock(inode_alloc_inode);
> @@ -3253,7 +3261,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
>  	iput(inode_alloc_inode);
>  	brelse(alloc_bh);
>  bail:
> -	if (status)
> +	if (status && !quiet)
>  		mlog_errno(status);
>  	return status;
>  }


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ