[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260203062523.3869120-18-yi.zhang@huawei.com>
Date: Tue, 3 Feb 2026 14:25:17 +0800
From: Zhang Yi <yi.zhang@...wei.com>
To: linux-ext4@...r.kernel.org
Cc: linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org,
tytso@....edu,
adilger.kernel@...ger.ca,
jack@...e.cz,
ojaswin@...ux.ibm.com,
ritesh.list@...il.com,
hch@...radead.org,
djwong@...nel.org,
yi.zhang@...wei.com,
yi.zhang@...weicloud.com,
yizhang089@...il.com,
libaokun1@...wei.com,
yangerkun@...wei.com,
yukuai@...as.com
Subject: [PATCH -next v2 17/22] ext4: implement partial block zero range iomap path
Introduce a new iomap_ops instance, ext4_iomap_zero_ops, along with
ext4_iomap_block_zero_range() to implement the iomap block zeroing range
for ext4. ext4_iomap_block_zero_range() invokes iomap_zero_range() and
passes ext4_iomap_zero_begin() to locate and zero out a mapped partial
block or a dirty, unwritten partial block.
Note that zeroing out under an active handle can cause deadlock since
the order of acquiring the folio lock and starting a handle is
inconsistent with the iomap iteration procedure. Therefore,
ext4_iomap_block_zero_range() cannot be called under an active handle.
Signed-off-by: Zhang Yi <yi.zhang@...wei.com>
---
fs/ext4/inode.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 85 insertions(+)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d2852159fa3..c59f3adba0f3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4107,6 +4107,50 @@ static int ext4_iomap_buffered_da_write_end(struct inode *inode, loff_t offset,
return 0;
}
+static int ext4_iomap_zero_begin(struct inode *inode,
+ loff_t offset, loff_t length, unsigned int flags,
+ struct iomap *iomap, struct iomap *srcmap)
+{
+ struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
+ struct ext4_map_blocks map;
+ u8 blkbits = inode->i_blkbits;
+ unsigned int iomap_flags = 0;
+ int ret;
+
+ ret = ext4_emergency_state(inode->i_sb);
+ if (unlikely(ret))
+ return ret;
+
+ if (WARN_ON_ONCE(!(flags & IOMAP_ZERO)))
+ return -EINVAL;
+
+ ret = ext4_iomap_map_blocks(inode, offset, length, NULL, &map);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Look up dirty folios for unwritten mappings within EOF. Providing
+ * this bypasses the flush iomap uses to trigger extent conversion
+ * when unwritten mappings have dirty pagecache in need of zeroing.
+ */
+ if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+ loff_t offset = ((loff_t)map.m_lblk) << blkbits;
+ loff_t end = ((loff_t)map.m_lblk + map.m_len) << blkbits;
+
+ iomap_fill_dirty_folios(iter, &offset, end, &iomap_flags);
+ if ((offset >> blkbits) < map.m_lblk + map.m_len)
+ map.m_len = (offset >> blkbits) - map.m_lblk;
+ }
+
+ ext4_set_iomap(inode, iomap, &map, offset, length, flags);
+ iomap->flags |= iomap_flags;
+
+ return 0;
+}
+
+const struct iomap_ops ext4_iomap_zero_ops = {
+ .iomap_begin = ext4_iomap_zero_begin,
+};
const struct iomap_ops ext4_iomap_buffered_write_ops = {
.iomap_begin = ext4_iomap_buffered_write_begin,
@@ -4622,6 +4666,32 @@ static int ext4_journalled_block_zero_range(struct inode *inode, loff_t from,
return err;
}
+static int ext4_iomap_block_zero_range(struct inode *inode, loff_t from,
+ loff_t length, bool *did_zero)
+{
+ /*
+ * Zeroing out under an active handle can cause deadlock since
+ * the order of acquiring the folio lock and starting a handle is
+ * inconsistent with the iomap writeback procedure.
+ */
+ if (WARN_ON_ONCE(ext4_handle_valid(journal_current_handle())))
+ return -EINVAL;
+
+ /* The zeroing scope should not extend across a block. */
+ if (WARN_ON_ONCE((from >> inode->i_blkbits) !=
+ ((from + length - 1) >> inode->i_blkbits)))
+ return -EINVAL;
+
+ if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS) &&
+ !(inode_state_read_once(inode) & (I_NEW | I_FREEING)))
+ WARN_ON_ONCE(!inode_is_locked(inode) &&
+ !rwsem_is_locked(&inode->i_mapping->invalidate_lock));
+
+ return iomap_zero_range(inode, from, length, did_zero,
+ &ext4_iomap_zero_ops,
+ &ext4_iomap_write_ops, NULL);
+}
+
/*
* ext4_block_zero_page_range() zeros out a mapping of length 'length'
* starting from file offset 'from'. The range to be zero'd must
@@ -4650,6 +4720,9 @@ static int ext4_block_zero_page_range(struct address_space *mapping,
} else if (ext4_should_journal_data(inode)) {
return ext4_journalled_block_zero_range(inode, from,
length, did_zero);
+ } else if (ext4_inode_buffered_iomap(inode)) {
+ return ext4_iomap_block_zero_range(inode, from, length,
+ did_zero);
}
return ext4_block_zero_range(inode, from, length, did_zero);
}
@@ -5063,6 +5136,18 @@ int ext4_truncate(struct inode *inode)
err = zero_len;
goto out_trace;
}
+ /*
+ * inodes using the iomap buffered I/O path do not use the
+ * ordered data mode, it is necessary to write out zeroed data
+ * before the updating i_disksize transaction is committed.
+ */
+ if (zero_len > 0 && ext4_inode_buffered_iomap(inode)) {
+ err = filemap_write_and_wait_range(mapping,
+ inode->i_size,
+ inode->i_size + zero_len - 1);
+ if (err)
+ return err;
+ }
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
--
2.52.0
Powered by blists - more mailing lists