linux-kernel - [PATCH v2 08/14] fs: xfs: iomap: Sub-extent zeroing

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240304130428.13026-9-john.g.garry@oracle.com>
Date: Mon,  4 Mar 2024 13:04:22 +0000
From: John Garry <john.g.garry@...cle.com>
To: djwong@...nel.org, hch@....de, viro@...iv.linux.org.uk, brauner@...nel.org,
        jack@...e.cz, chandan.babu@...cle.com, david@...morbit.com,
        axboe@...nel.dk
Cc: martin.petersen@...cle.com, linux-kernel@...r.kernel.org,
        linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        tytso@....edu, jbongio@...gle.com, ojaswin@...ux.ibm.com,
        ritesh.list@...il.com, linux-block@...r.kernel.org,
        John Garry <john.g.garry@...cle.com>
Subject: [PATCH v2 08/14] fs: xfs: iomap: Sub-extent zeroing

Set iomap->extent_shift when sub-extent zeroing is required.

We treat a sub-extent write same as an unaligned write, so we can leverage
the existing sub-FSblock unaligned write support, i.e. try a shared lock
with IOMAP_DIO_OVERWRITE_ONLY flag, if this fails then try the exclusive
lock.

In xfs_iomap_write_unwritten(), FSB calcs are now based on the extsize.

Signed-off-by: John Garry <john.g.garry@...cle.com>
---
 fs/xfs/xfs_file.c  | 28 +++++++++++++++-------------
 fs/xfs/xfs_iomap.c | 15 +++++++++++++--
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e33e5e13b95f..d0bd9d5f596c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -617,18 +617,19 @@ xfs_file_dio_write_aligned(
  * Handle block unaligned direct I/O writes
  *
  * In most cases direct I/O writes will be done holding IOLOCK_SHARED, allowing
- * them to be done in parallel with reads and other direct I/O writes.  However,
- * if the I/O is not aligned to filesystem blocks, the direct I/O layer may need
- * to do sub-block zeroing and that requires serialisation against other direct
- * I/O to the same block.  In this case we need to serialise the submission of
- * the unaligned I/O so that we don't get racing block zeroing in the dio layer.
- * In the case where sub-block zeroing is not required, we can do concurrent
- * sub-block dios to the same block successfully.
+ * them to be done in parallel with reads and other direct I/O writes.
+ * However if the I/O is not aligned to filesystem blocks/extent, the direct
+ * I/O layer may need to do sub-block/extent zeroing and that requires
+ * serialisation against other direct I/O to the same block/extent.  In this
+ * case we need to serialise the submission of the unaligned I/O so that we
+ * don't get racing block/extent zeroing in the dio layer.
+ * In the case where sub-block/extent zeroing is not required, we can do
+ * concurrent sub-block/extent dios to the same block/extent successfully.
  *
  * Optimistically submit the I/O using the shared lock first, but use the
  * IOMAP_DIO_OVERWRITE_ONLY flag to tell the lower layers to return -EAGAIN
- * if block allocation or partial block zeroing would be required.  In that case
- * we try again with the exclusive lock.
+ * if block/extent allocation or partial block/extent zeroing would be
+ * required.  In that case we try again with the exclusive lock.
  */
 static noinline ssize_t
 xfs_file_dio_write_unaligned(
@@ -643,9 +644,9 @@ xfs_file_dio_write_unaligned(
 	ssize_t			ret;
 
 	/*
-	 * Extending writes need exclusivity because of the sub-block zeroing
-	 * that the DIO code always does for partial tail blocks beyond EOF, so
-	 * don't even bother trying the fast path in this case.
+	 * Extending writes need exclusivity because of the sub-block/extent
+	 * zeroing that the DIO code always does for partial tail blocks
+	 * beyond EOF, so don't even bother trying the fast path in this case.
 	 */
 	if (iocb->ki_pos > isize || iocb->ki_pos + count >= isize) {
 		if (iocb->ki_flags & IOCB_NOWAIT)
@@ -709,13 +710,14 @@ xfs_file_dio_write(
 	struct iov_iter		*from)
 {
 	struct xfs_inode	*ip = XFS_I(file_inode(iocb->ki_filp));
+	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_buftarg      *target = xfs_inode_buftarg(ip);
 	size_t			count = iov_iter_count(from);
 
 	/* direct I/O must be aligned to device logical sector size */
 	if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
 		return -EINVAL;
-	if ((iocb->ki_pos | count) & ip->i_mount->m_blockmask)
+	if ((iocb->ki_pos | count) & (XFS_FSB_TO_B(mp, xfs_get_extsz(ip)) - 1))
 		return xfs_file_dio_write_unaligned(ip, iocb, from);
 	return xfs_file_dio_write_aligned(ip, iocb, from);
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 70fe873951f3..88cc20bb19c9 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -98,6 +98,7 @@ xfs_bmbt_to_iomap(
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
+	xfs_extlen_t		extsz = xfs_get_extsz(ip);
 
 	if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
 		return xfs_alert_fsblock_zero(ip, imap);
@@ -134,6 +135,8 @@ xfs_bmbt_to_iomap(
 
 	iomap->validity_cookie = sequence_cookie;
 	iomap->folio_ops = &xfs_iomap_folio_ops;
+	if (extsz > 1)
+		iomap->extent_shift = ffs(extsz) - 1;
 	return 0;
 }
 
@@ -563,11 +566,19 @@ xfs_iomap_write_unwritten(
 	xfs_fsize_t	i_size;
 	uint		resblks;
 	int		error;
+	xfs_extlen_t	extsz = xfs_get_extsz(ip);
 
 	trace_xfs_unwritten_convert(ip, offset, count);
 
-	offset_fsb = XFS_B_TO_FSBT(mp, offset);
-	count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+	if (extsz > 1) {
+		xfs_extlen_t extsize_bytes = XFS_FSB_TO_B(mp, extsz);
+
+		offset_fsb = XFS_B_TO_FSBT(mp, round_down(offset, extsize_bytes));
+		count_fsb = XFS_B_TO_FSB(mp, round_up(offset + count, extsize_bytes));
+	} else {
+		offset_fsb = XFS_B_TO_FSBT(mp, offset);
+		count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
+	}
 	count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
 
 	/*
-- 
2.31.1