[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250228011941.GE1124788@frogsfrogsfrogs>
Date: Thu, 27 Feb 2025 17:19:41 -0800
From: "Darrick J. Wong" <djwong@...nel.org>
To: John Garry <john.g.garry@...cle.com>
Cc: brauner@...nel.org, cem@...nel.org, linux-xfs@...r.kernel.org,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
ojaswin@...ux.ibm.com, ritesh.list@...il.com,
martin.petersen@...cle.com, tytso@....edu,
linux-ext4@...r.kernel.org
Subject: Re: [PATCH v3 08/12] xfs: Iomap SW-based atomic write support
On Thu, Feb 27, 2025 at 06:08:09PM +0000, John Garry wrote:
> In cases of an atomic write occurs for misaligned or discontiguous disk
> blocks, we will use a CoW-based method to issue the atomic write.
>
> So, for that case, return -EAGAIN to request that the write be issued in
> CoW atomic write mode. The dio write path should detect this, similar to
> how misaligned regular DIO writes are handled.
>
> For normal HW-based mode, when the range which we are atomic writing to
> covers a shared data extent, try to allocate a new CoW fork. However, if
> we find that what we allocated does not meet atomic write requirements
> in terms of length and alignment, then fallback on the CoW-based mode
> for the atomic write.
>
> Signed-off-by: John Garry <john.g.garry@...cle.com>
Looks reasonable,
Reviewed-by: "Darrick J. Wong" <djwong@...nel.org>
--D
> ---
> fs/xfs/xfs_iomap.c | 143 ++++++++++++++++++++++++++++++++++++++++++++-
> fs/xfs/xfs_iomap.h | 1 +
> 2 files changed, 142 insertions(+), 2 deletions(-)
>
> diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
> index edfc038bf728..573108cbdc5c 100644
> --- a/fs/xfs/xfs_iomap.c
> +++ b/fs/xfs/xfs_iomap.c
> @@ -795,6 +795,23 @@ imap_spans_range(
> return true;
> }
>
> +static bool
> +xfs_bmap_valid_for_atomic_write(
> + struct xfs_bmbt_irec *imap,
> + xfs_fileoff_t offset_fsb,
> + xfs_fileoff_t end_fsb)
> +{
> + /* Misaligned start block wrt size */
> + if (!IS_ALIGNED(imap->br_startblock, imap->br_blockcount))
> + return false;
> +
> + /* Discontiguous or mixed extents */
> + if (!imap_spans_range(imap, offset_fsb, end_fsb))
> + return false;
> +
> + return true;
> +}
> +
> static int
> xfs_direct_write_iomap_begin(
> struct inode *inode,
> @@ -809,10 +826,13 @@ xfs_direct_write_iomap_begin(
> struct xfs_bmbt_irec imap, cmap;
> xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
> xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> + xfs_fileoff_t orig_end_fsb = end_fsb;
> + bool atomic_hw = flags & IOMAP_ATOMIC_HW;
> int nimaps = 1, error = 0;
> unsigned int reflink_flags = 0;
> bool shared = false;
> u16 iomap_flags = 0;
> + bool needs_alloc;
> unsigned int lockmode;
> u64 seq;
>
> @@ -871,13 +891,37 @@ xfs_direct_write_iomap_begin(
> &lockmode, reflink_flags);
> if (error)
> goto out_unlock;
> - if (shared)
> + if (shared) {
> + if (atomic_hw &&
> + !xfs_bmap_valid_for_atomic_write(&cmap,
> + offset_fsb, end_fsb)) {
> + error = -EAGAIN;
> + goto out_unlock;
> + }
> goto out_found_cow;
> + }
> end_fsb = imap.br_startoff + imap.br_blockcount;
> length = XFS_FSB_TO_B(mp, end_fsb) - offset;
> }
>
> - if (imap_needs_alloc(inode, flags, &imap, nimaps))
> + needs_alloc = imap_needs_alloc(inode, flags, &imap, nimaps);
> +
> + if (atomic_hw) {
> + error = -EAGAIN;
> + /*
> + * Use CoW method for when we need to alloc > 1 block,
> + * otherwise we might allocate less than what we need here and
> + * have multiple mappings.
> + */
> + if (needs_alloc && orig_end_fsb - offset_fsb > 1)
> + goto out_unlock;
> +
> + if (!xfs_bmap_valid_for_atomic_write(&imap, offset_fsb,
> + orig_end_fsb))
> + goto out_unlock;
> + }
> +
> + if (needs_alloc)
> goto allocate_blocks;
>
> /*
> @@ -965,6 +1009,101 @@ const struct iomap_ops xfs_direct_write_iomap_ops = {
> .iomap_begin = xfs_direct_write_iomap_begin,
> };
>
> +static int
> +xfs_atomic_write_sw_iomap_begin(
> + struct inode *inode,
> + loff_t offset,
> + loff_t length,
> + unsigned flags,
> + struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + struct xfs_inode *ip = XFS_I(inode);
> + struct xfs_mount *mp = ip->i_mount;
> + struct xfs_bmbt_irec imap, cmap;
> + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
> + xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
> + int nimaps = 1, error;
> + unsigned int reflink_flags;
> + bool shared = false;
> + u16 iomap_flags = 0;
> + unsigned int lockmode = XFS_ILOCK_EXCL;
> + u64 seq;
> +
> + if (xfs_is_shutdown(mp))
> + return -EIO;
> +
> + reflink_flags = XFS_REFLINK_CONVERT | XFS_REFLINK_ATOMIC_SW;
> +
> + /*
> + * Set IOMAP_F_DIRTY similar to xfs_atomic_write_iomap_begin()
> + */
> + if (offset + length > i_size_read(inode))
> + iomap_flags |= IOMAP_F_DIRTY;
> +
> + error = xfs_ilock_for_iomap(ip, flags, &lockmode);
> + if (error)
> + return error;
> +
> + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
> + &nimaps, 0);
> + if (error)
> + goto out_unlock;
> +
> + error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
> + &lockmode, reflink_flags);
> + /*
> + * Don't check @shared. For atomic writes, we should error when
> + * we don't get a COW mapping
> + */
> + if (error)
> + goto out_unlock;
> +
> + end_fsb = imap.br_startoff + imap.br_blockcount;
> +
> + length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
> + trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
> + if (imap.br_startblock != HOLESTARTBLOCK) {
> + seq = xfs_iomap_inode_sequence(ip, 0);
> + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
> + if (error)
> + goto out_unlock;
> + }
> + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
> + xfs_iunlock(ip, lockmode);
> + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
> +
> +out_unlock:
> + if (lockmode)
> + xfs_iunlock(ip, lockmode);
> + return error;
> +}
> +
> +static int
> +xfs_atomic_write_iomap_begin(
> + struct inode *inode,
> + loff_t offset,
> + loff_t length,
> + unsigned flags,
> + struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + ASSERT(flags & IOMAP_WRITE);
> + ASSERT(flags & IOMAP_DIRECT);
> +
> + if (flags & IOMAP_ATOMIC_SW)
> + return xfs_atomic_write_sw_iomap_begin(inode, offset, length,
> + flags, iomap, srcmap);
> +
> + ASSERT(flags & IOMAP_ATOMIC_HW);
> + return xfs_direct_write_iomap_begin(inode, offset, length, flags,
> + iomap, srcmap);
> +}
> +
> +const struct iomap_ops xfs_atomic_write_iomap_ops = {
> + .iomap_begin = xfs_atomic_write_iomap_begin,
> +};
> +
> static int
> xfs_dax_write_iomap_end(
> struct inode *inode,
> diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
> index 8347268af727..b7fbbc909943 100644
> --- a/fs/xfs/xfs_iomap.h
> +++ b/fs/xfs/xfs_iomap.h
> @@ -53,5 +53,6 @@ extern const struct iomap_ops xfs_read_iomap_ops;
> extern const struct iomap_ops xfs_seek_iomap_ops;
> extern const struct iomap_ops xfs_xattr_iomap_ops;
> extern const struct iomap_ops xfs_dax_write_iomap_ops;
> +extern const struct iomap_ops xfs_atomic_write_iomap_ops;
>
> #endif /* __XFS_IOMAP_H__*/
> --
> 2.31.1
>
Powered by blists - more mailing lists