linux-kernel - Re: [PATCH v6 10/13] xfs: iomap COW-based atomic write support

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <ef315f4e-d7e9-48ee-b975-e0a014d10ba2@oracle.com>
Date: Wed, 19 Mar 2025 10:24:55 +0000
From: John Garry <john.g.garry@...cle.com>
To: Christoph Hellwig <hch@....de>
Cc: brauner@...nel.org, djwong@...nel.org, cem@...nel.org, dchinner@...hat.com,
        linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, ojaswin@...ux.ibm.com,
        ritesh.list@...il.com, martin.petersen@...cle.com, tytso@....edu,
        linux-ext4@...r.kernel.org
Subject: Re: [PATCH v6 10/13] xfs: iomap COW-based atomic write support

On 19/03/2025 07:30, Christoph Hellwig wrote:
> On Tue, Mar 18, 2025 at 05:44:46PM +0000, John Garry wrote:
>> Please suggest any further modifications to the following attempt. I have
>> XFS_REFLINK_FORCE_COW still being passed to xfs_reflink_fill_cow_hole(),
>> but xfs_reflink_fill_cow_hole() is quite a large function and I am not sure
>> if I want to duplicate lots of it.
> 
> As said I'd do away with the helpers.  Below is my completely
> untested whiteboard coding attempt, based against the series you
> sent out.

it seems to work ok, cheers

Just a query, below

> 
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index 88d86cabb8a1..06ece7070cfd 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -1083,67 +1083,104 @@ xfs_atomic_write_cow_iomap_begin(
>   	struct iomap		*iomap,
>   	struct iomap		*srcmap)
>   {
> -	ASSERT(flags & IOMAP_WRITE);
> -	ASSERT(flags & IOMAP_DIRECT);
> -
>   	struct xfs_inode	*ip = XFS_I(inode);
>   	struct xfs_mount	*mp = ip->i_mount;
> -	struct xfs_bmbt_irec	imap, cmap;
>   	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
>   	xfs_fileoff_t		end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> -	int			nimaps = 1, error;
> -	bool			shared = false;
> -	unsigned int		lockmode = XFS_ILOCK_EXCL;
> +	xfs_filblks_t		count_fsb = end_fsb - offset_fsb;
> +	int			nmaps = 1;
> +	xfs_filblks_t		resaligned;
> +	struct xfs_bmbt_irec	cmap;
> +	struct xfs_iext_cursor	icur;
> +	struct xfs_trans	*tp;
> +	int			error;
>   	u64			seq;
>   
> +	ASSERT(!XFS_IS_REALTIME_INODE(ip));
> +	ASSERT(flags & IOMAP_WRITE);
> +	ASSERT(flags & IOMAP_DIRECT);
> +
>   	if (xfs_is_shutdown(mp))
>   		return -EIO;
>   
> -	if (!xfs_has_reflink(mp))
> +	if (WARN_ON_ONCE(!xfs_has_reflink(mp)))
>   		return -EINVAL;
>   
> -	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
> +	xfs_ilock(ip, XFS_ILOCK_EXCL);
> +
> +	if (!ip->i_cowfp) {
> +		ASSERT(!xfs_is_reflink_inode(ip));
> +		xfs_ifork_init_cow(ip);
> +	}
> +
> +	/*
> +	 * If we don't find an overlapping extent, trim the range we need to
> +	 * allocate to fit the hole we found.
> +	 */
> +	if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
> +		cmap.br_startoff = end_fsb;
> +	if (cmap.br_startoff <= offset_fsb) {
> +		xfs_trim_extent(&cmap, offset_fsb, count_fsb);
> +		goto found;
> +	}
> +
> +	end_fsb = cmap.br_startoff;
> +	count_fsb = end_fsb - offset_fsb;
> +	resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
> +			xfs_get_cowextsz_hint(ip));
> +	xfs_iunlock(ip, XFS_ILOCK_EXCL);
> +
> +	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
> +			XFS_DIOSTRAT_SPACE_RES(mp, resaligned), 0, false, &tp);
>   	if (error)
>   		return error;
>   
> -	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
> -			&nimaps, 0);
> -	if (error)
> -		goto out_unlock;
> +	if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &cmap))
> +		cmap.br_startoff = end_fsb;

Do we really need this logic?

offset_fsb does not change, and logically cmap.br_startoff == end_fsb 
already, right?

> +	if (cmap.br_startoff <= offset_fsb) {
> +		xfs_trim_extent(&cmap, offset_fsb, count_fsb);
> +		xfs_trans_cancel(tp);
> +		goto found;
> +	}
>   
> -	 /*
> -	  * Use XFS_REFLINK_ALLOC_EXTSZALIGN to hint at aligning new extents
> -	  * according to extszhint, such that there will be a greater chance
> -	  * that future atomic writes to that same range will be aligned (and
> -	  * don't require this COW-based method).
> -	  */
> -	error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
> -			&lockmode, XFS_REFLINK_CONVERT_UNWRITTEN |
> -			XFS_REFLINK_FORCE_COW | XFS_REFLINK_ALLOC_EXTSZALIGN);
>   	/*
> -	 * Don't check @shared. For atomic writes, we should error when
> -	 * we don't get a COW fork extent mapping.
> +	 * Allocate the entire reservation as unwritten blocks.
> +	 *
> +	 * Use XFS_BMAPI_EXTSZALIGN to hint at aligning new extents according to
> +	 * extszhint, such that there will be a greater chance that future
> +	 * atomic writes to that same range will be aligned (and don't require
> +	 * this COW-based method).
>   	 */
> -	if (error)
> +	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
> +			XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC |
> +			XFS_BMAPI_EXTSZALIGN, 0, &cmap, &nmaps);