lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250102140411.14617-5-john.g.garry@oracle.com>
Date: Thu,  2 Jan 2025 14:04:08 +0000
From: John Garry <john.g.garry@...cle.com>
To: brauner@...nel.org, djwong@...nel.org, cem@...nel.org, dchinner@...hat.com,
        hch@....de, ritesh.list@...il.com
Cc: linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, martin.petersen@...cle.com,
        John Garry <john.g.garry@...cle.com>
Subject: [PATCH v3 4/7] xfs: Add extent zeroing support for atomic writes

An atomic write which spans mixed unwritten and mapped extents would be
rejected. This is one reason why atomic write unit min and max is
currently fixed at the block size.

To enable large atomic writes, any unwritten extents need to be zeroed
before issuing the atomic write. So call iomap_dio_zero_unwritten() for
this scenario and retry the atomic write.

It can be detected if there is any unwritten extents by passing
IOMAP_DIO_OVERWRITE_ONLY to the original iomap_dio_rw() call.

After iomap_dio_zero_unwritten() is called then iomap_dio_rw() is retried -
if that fails then there really is something wrong.

However keep the same behaviour for writing a single block, i.e. we don't
need to pre-zero.

Signed-off-by: John Garry <john.g.garry@...cle.com>
---
 fs/xfs/xfs_file.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9a435b1ff264..2c810f75dbbd 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -578,10 +578,47 @@ xfs_dio_write_end_io(
 	return error;
 }
 
+static int
+xfs_dio_write_end_zero_unwritten(
+	struct kiocb		*iocb,
+	ssize_t			size,
+	int			error,
+	unsigned		flags)
+{
+	struct inode		*inode = file_inode(iocb->ki_filp);
+	struct xfs_inode	*ip = XFS_I(inode);
+	loff_t			offset = iocb->ki_pos;
+	unsigned int		nofs_flag;
+
+	trace_xfs_end_io_direct_write(ip, offset, size);
+
+	if (xfs_is_shutdown(ip->i_mount))
+		return -EIO;
+
+	if (error)
+		return error;
+	if (WARN_ON_ONCE(!size))
+		return 0;
+	if (!(flags & IOMAP_DIO_UNWRITTEN))
+		return 0;
+
+	/* Same as xfs_dio_write_end_io() ... */
+	nofs_flag = memalloc_nofs_save();
+
+	error = xfs_iomap_write_unwritten(ip, offset, size, true);
+
+	memalloc_nofs_restore(nofs_flag);
+	return error;
+}
+
 static const struct iomap_dio_ops xfs_dio_write_ops = {
 	.end_io		= xfs_dio_write_end_io,
 };
 
+static const struct iomap_dio_ops xfs_dio_zero_ops = {
+	.end_io		= xfs_dio_write_end_zero_unwritten,
+};
+
 /*
  * Handle block aligned direct I/O writes
  */
@@ -619,6 +656,63 @@ xfs_file_dio_write_aligned(
 	return ret;
 }
 
+static noinline ssize_t
+xfs_file_dio_write_atomic(
+	struct xfs_inode	*ip,
+	struct kiocb		*iocb,
+	struct iov_iter		*from)
+{
+	unsigned int		iolock = XFS_IOLOCK_SHARED;
+	bool			do_zero = false;
+	unsigned int		dio_flags;
+	ssize_t			ret;
+
+	/*
+	 * Zero unwritten only for writing multiple blocks. Leverage
+	 * IOMAP_DIO_OVERWRITE_ONLY detecting when zeroing is required, as
+	 * it ensures that a single written mapping is provided.
+	 */
+	if (iov_iter_count(from) > ip->i_mount->m_sb.sb_blocksize)
+		dio_flags = IOMAP_DIO_OVERWRITE_ONLY;
+	else
+		dio_flags = 0;
+
+retry:
+	ret = xfs_ilock_iocb_for_write(iocb, &iolock);
+	if (ret)
+		return ret;
+
+	ret = xfs_file_write_checks(iocb, from, &iolock);
+	if (ret)
+		goto out_unlock;
+
+	if (do_zero) {
+		ret = iomap_dio_zero_unwritten(iocb, from,
+				&xfs_direct_write_iomap_ops,
+				&xfs_dio_zero_ops);
+		if (ret)
+			goto out_unlock;
+	}
+
+	trace_xfs_file_direct_write(iocb, from);
+	ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
+			&xfs_dio_write_ops, dio_flags, NULL, 0);
+
+	if (do_zero && ret < 0)
+		goto out_unlock;
+
+	if (ret == -EAGAIN && !(iocb->ki_flags & IOCB_NOWAIT)) {
+		xfs_iunlock(ip, iolock);
+		do_zero = true;
+		goto retry;
+	}
+
+out_unlock:
+	if (iolock)
+		xfs_iunlock(ip, iolock);
+	return ret;
+}
+
 /*
  * Handle block unaligned direct I/O writes
  *
@@ -723,6 +817,8 @@ xfs_file_dio_write(
 		return -EINVAL;
 	if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
 		return xfs_file_dio_write_unaligned(ip, iocb, from);
+	if (iocb->ki_flags & IOCB_ATOMIC)
+		return xfs_file_dio_write_atomic(ip, iocb, from);
 	return xfs_file_dio_write_aligned(ip, iocb, from);
 }
 
-- 
2.31.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ