lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250204120127.2396727-4-john.g.garry@oracle.com>
Date: Tue,  4 Feb 2025 12:01:20 +0000
From: John Garry <john.g.garry@...cle.com>
To: brauner@...nel.org, djwong@...nel.org, cem@...nel.org, dchinner@...hat.com,
        hch@....de
Cc: linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-kernel@...r.kernel.org, ojaswin@...ux.ibm.com,
        ritesh.list@...il.com, martin.petersen@...cle.com,
        John Garry <john.g.garry@...cle.com>
Subject: [PATCH RFC 03/10] iomap: Support CoW-based atomic writes

Currently atomic write support requires dedicated HW support. This imposes
a restriction on the filesystem that disk blocks need to be aligned and
contiguously mapped to FS blocks to issue atomic writes.

XFS has no method to guarantee FS block alignment. As such, atomic writes
are currently limited to 1x FS block there.

To allow deal with the scenario that we are issuing an atomic write over
misaligned or discontiguous data blocks larger atomic writes - and raise
the atomic write limit - support a CoW-based software emulated atomic
write mode.

For this special mode, the FS will reserve blocks for that data to be
written and then atomically map that data in once the data has been
commited to disk.

It is the responsibility of the FS to detect discontiguous atomic writes
and switch to IOMAP_DIO_ATOMIC_COW mode and retry the write.

Signed-off-by: John Garry <john.g.garry@...cle.com>
---
 fs/iomap/direct-io.c  | 23 ++++++++++++++++-------
 include/linux/iomap.h |  9 +++++++++
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 3dd883dd77d2..e63b5096bcd8 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -271,7 +271,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
  * clearing the WRITE_THROUGH flag in the dio request.
  */
 static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
-		const struct iomap *iomap, bool use_fua, bool atomic)
+		const struct iomap *iomap, bool use_fua, bool atomic_bio)
 {
 	blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
 
@@ -283,7 +283,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
 		opflags |= REQ_FUA;
 	else
 		dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
-	if (atomic)
+	if (atomic_bio)
 		opflags |= REQ_ATOMIC;
 
 	return opflags;
@@ -301,13 +301,19 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	blk_opf_t bio_opf;
 	struct bio *bio;
 	bool need_zeroout = false;
+	bool atomic_bio = false;
 	bool use_fua = false;
 	int nr_pages, ret = 0;
 	size_t copied = 0;
 	size_t orig_count;
 
-	if (atomic && length != iter->len)
-		return -EINVAL;
+	if (atomic) {
+		if (!(iomap->flags & IOMAP_F_ATOMIC_COW)) {
+			if (length != iter->len)
+				return -EINVAL;
+			atomic_bio = true;
+		}
+	}
 
 	if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
 	    !bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
@@ -318,7 +324,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		need_zeroout = true;
 	}
 
-	if (iomap->flags & IOMAP_F_SHARED)
+	if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_ATOMIC_COW))
 		dio->flags |= IOMAP_DIO_COW;
 
 	if (iomap->flags & IOMAP_F_NEW) {
@@ -383,7 +389,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 			goto out;
 	}
 
-	bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
+	bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_bio);
 
 	nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
 	do {
@@ -416,7 +422,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		}
 
 		n = bio->bi_iter.bi_size;
-		if (WARN_ON_ONCE(atomic && n != length)) {
+		if (WARN_ON_ONCE(atomic_bio && n != length)) {
 			/*
 			 * This bio should have covered the complete length,
 			 * which it doesn't, so error. We may need to zero out
@@ -639,6 +645,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
 			dio->flags |= IOMAP_DIO_CALLER_COMP;
 
+		if (dio_flags & IOMAP_DIO_ATOMIC_COW)
+			iomi.flags |= IOMAP_ATOMIC_COW;
+
 		if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
 			ret = -EAGAIN;
 			if (iomi.pos >= dio->i_size ||
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 75bf54e76f3b..0a0b6798f517 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -56,6 +56,8 @@ struct vm_fault;
  *
  * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
  * never be merged with the mapping before it.
+ *
+ * IOMAP_F_ATOMIC_COW indicates that we require atomic CoW end IO handling.
  */
 #define IOMAP_F_NEW		(1U << 0)
 #define IOMAP_F_DIRTY		(1U << 1)
@@ -68,6 +70,7 @@ struct vm_fault;
 #endif /* CONFIG_BUFFER_HEAD */
 #define IOMAP_F_XATTR		(1U << 5)
 #define IOMAP_F_BOUNDARY	(1U << 6)
+#define IOMAP_F_ATOMIC_COW	(1U << 7)
 
 /*
  * Flags set by the core iomap code during operations:
@@ -183,6 +186,7 @@ struct iomap_folio_ops {
 #define IOMAP_DAX		0
 #endif /* CONFIG_FS_DAX */
 #define IOMAP_ATOMIC		(1 << 9)
+#define IOMAP_ATOMIC_COW	(1 << 10)
 
 struct iomap_ops {
 	/*
@@ -434,6 +438,11 @@ struct iomap_dio_ops {
  */
 #define IOMAP_DIO_PARTIAL		(1 << 2)
 
+/*
+ * Use CoW-based software emulated atomic write.
+ */
+#define IOMAP_DIO_ATOMIC_COW		(1 << 3)
+
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
 		unsigned int dio_flags, void *private, size_t done_before);
-- 
2.31.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ