[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250204120127.2396727-4-john.g.garry@oracle.com>
Date: Tue, 4 Feb 2025 12:01:20 +0000
From: John Garry <john.g.garry@...cle.com>
To: brauner@...nel.org, djwong@...nel.org, cem@...nel.org, dchinner@...hat.com,
hch@....de
Cc: linux-xfs@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org, ojaswin@...ux.ibm.com,
ritesh.list@...il.com, martin.petersen@...cle.com,
John Garry <john.g.garry@...cle.com>
Subject: [PATCH RFC 03/10] iomap: Support CoW-based atomic writes
Currently atomic write support requires dedicated HW support. This imposes
a restriction on the filesystem that disk blocks need to be aligned and
contiguously mapped to FS blocks to issue atomic writes.
XFS has no method to guarantee FS block alignment. As such, atomic writes
are currently limited to 1x FS block there.
To allow deal with the scenario that we are issuing an atomic write over
misaligned or discontiguous data blocks larger atomic writes - and raise
the atomic write limit - support a CoW-based software emulated atomic
write mode.
For this special mode, the FS will reserve blocks for that data to be
written and then atomically map that data in once the data has been
commited to disk.
It is the responsibility of the FS to detect discontiguous atomic writes
and switch to IOMAP_DIO_ATOMIC_COW mode and retry the write.
Signed-off-by: John Garry <john.g.garry@...cle.com>
---
fs/iomap/direct-io.c | 23 ++++++++++++++++-------
include/linux/iomap.h | 9 +++++++++
2 files changed, 25 insertions(+), 7 deletions(-)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 3dd883dd77d2..e63b5096bcd8 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -271,7 +271,7 @@ static int iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
* clearing the WRITE_THROUGH flag in the dio request.
*/
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
- const struct iomap *iomap, bool use_fua, bool atomic)
+ const struct iomap *iomap, bool use_fua, bool atomic_bio)
{
blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
@@ -283,7 +283,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
opflags |= REQ_FUA;
else
dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
- if (atomic)
+ if (atomic_bio)
opflags |= REQ_ATOMIC;
return opflags;
@@ -301,13 +301,19 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
blk_opf_t bio_opf;
struct bio *bio;
bool need_zeroout = false;
+ bool atomic_bio = false;
bool use_fua = false;
int nr_pages, ret = 0;
size_t copied = 0;
size_t orig_count;
- if (atomic && length != iter->len)
- return -EINVAL;
+ if (atomic) {
+ if (!(iomap->flags & IOMAP_F_ATOMIC_COW)) {
+ if (length != iter->len)
+ return -EINVAL;
+ atomic_bio = true;
+ }
+ }
if ((pos | length) & (bdev_logical_block_size(iomap->bdev) - 1) ||
!bdev_iter_is_aligned(iomap->bdev, dio->submit.iter))
@@ -318,7 +324,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
need_zeroout = true;
}
- if (iomap->flags & IOMAP_F_SHARED)
+ if (iomap->flags & (IOMAP_F_SHARED | IOMAP_F_ATOMIC_COW))
dio->flags |= IOMAP_DIO_COW;
if (iomap->flags & IOMAP_F_NEW) {
@@ -383,7 +389,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
goto out;
}
- bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic);
+ bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_bio);
nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
do {
@@ -416,7 +422,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
}
n = bio->bi_iter.bi_size;
- if (WARN_ON_ONCE(atomic && n != length)) {
+ if (WARN_ON_ONCE(atomic_bio && n != length)) {
/*
* This bio should have covered the complete length,
* which it doesn't, so error. We may need to zero out
@@ -639,6 +645,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
dio->flags |= IOMAP_DIO_CALLER_COMP;
+ if (dio_flags & IOMAP_DIO_ATOMIC_COW)
+ iomi.flags |= IOMAP_ATOMIC_COW;
+
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
ret = -EAGAIN;
if (iomi.pos >= dio->i_size ||
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 75bf54e76f3b..0a0b6798f517 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -56,6 +56,8 @@ struct vm_fault;
*
* IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must
* never be merged with the mapping before it.
+ *
+ * IOMAP_F_ATOMIC_COW indicates that we require atomic CoW end IO handling.
*/
#define IOMAP_F_NEW (1U << 0)
#define IOMAP_F_DIRTY (1U << 1)
@@ -68,6 +70,7 @@ struct vm_fault;
#endif /* CONFIG_BUFFER_HEAD */
#define IOMAP_F_XATTR (1U << 5)
#define IOMAP_F_BOUNDARY (1U << 6)
+#define IOMAP_F_ATOMIC_COW (1U << 7)
/*
* Flags set by the core iomap code during operations:
@@ -183,6 +186,7 @@ struct iomap_folio_ops {
#define IOMAP_DAX 0
#endif /* CONFIG_FS_DAX */
#define IOMAP_ATOMIC (1 << 9)
+#define IOMAP_ATOMIC_COW (1 << 10)
struct iomap_ops {
/*
@@ -434,6 +438,11 @@ struct iomap_dio_ops {
*/
#define IOMAP_DIO_PARTIAL (1 << 2)
+/*
+ * Use CoW-based software emulated atomic write.
+ */
+#define IOMAP_DIO_ATOMIC_COW (1 << 3)
+
ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
const struct iomap_ops *ops, const struct iomap_dio_ops *dops,
unsigned int dio_flags, void *private, size_t done_before);
--
2.31.1
Powered by blists - more mailing lists