lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260203062523.3869120-18-yi.zhang@huawei.com>
Date: Tue,  3 Feb 2026 14:25:17 +0800
From: Zhang Yi <yi.zhang@...wei.com>
To: linux-ext4@...r.kernel.org
Cc: linux-fsdevel@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	tytso@....edu,
	adilger.kernel@...ger.ca,
	jack@...e.cz,
	ojaswin@...ux.ibm.com,
	ritesh.list@...il.com,
	hch@...radead.org,
	djwong@...nel.org,
	yi.zhang@...wei.com,
	yi.zhang@...weicloud.com,
	yizhang089@...il.com,
	libaokun1@...wei.com,
	yangerkun@...wei.com,
	yukuai@...as.com
Subject: [PATCH -next v2 17/22] ext4: implement partial block zero range iomap path

Introduce a new iomap_ops instance, ext4_iomap_zero_ops, along with
ext4_iomap_block_zero_range() to implement the iomap block zeroing range
for ext4. ext4_iomap_block_zero_range() invokes iomap_zero_range() and
passes ext4_iomap_zero_begin() to locate and zero out a mapped partial
block or a dirty, unwritten partial block.

Note that zeroing out under an active handle can cause deadlock since
the order of acquiring the folio lock and starting a handle is
inconsistent with the iomap iteration procedure. Therefore,
ext4_iomap_block_zero_range() cannot be called under an active handle.

Signed-off-by: Zhang Yi <yi.zhang@...wei.com>
---
 fs/ext4/inode.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d2852159fa3..c59f3adba0f3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4107,6 +4107,50 @@ static int ext4_iomap_buffered_da_write_end(struct inode *inode, loff_t offset,
 	return 0;
 }
 
+static int ext4_iomap_zero_begin(struct inode *inode,
+		loff_t offset, loff_t length, unsigned int flags,
+		struct iomap *iomap, struct iomap *srcmap)
+{
+	struct iomap_iter *iter = container_of(iomap, struct iomap_iter, iomap);
+	struct ext4_map_blocks map;
+	u8 blkbits = inode->i_blkbits;
+	unsigned int iomap_flags = 0;
+	int ret;
+
+	ret = ext4_emergency_state(inode->i_sb);
+	if (unlikely(ret))
+		return ret;
+
+	if (WARN_ON_ONCE(!(flags & IOMAP_ZERO)))
+		return -EINVAL;
+
+	ret = ext4_iomap_map_blocks(inode, offset, length, NULL, &map);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Look up dirty folios for unwritten mappings within EOF. Providing
+	 * this bypasses the flush iomap uses to trigger extent conversion
+	 * when unwritten mappings have dirty pagecache in need of zeroing.
+	 */
+	if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+		loff_t offset = ((loff_t)map.m_lblk) << blkbits;
+		loff_t end = ((loff_t)map.m_lblk + map.m_len) << blkbits;
+
+		iomap_fill_dirty_folios(iter, &offset, end, &iomap_flags);
+		if ((offset >> blkbits) < map.m_lblk + map.m_len)
+			map.m_len = (offset >> blkbits) - map.m_lblk;
+	}
+
+	ext4_set_iomap(inode, iomap, &map, offset, length, flags);
+	iomap->flags |= iomap_flags;
+
+	return 0;
+}
+
+const struct iomap_ops ext4_iomap_zero_ops = {
+	.iomap_begin = ext4_iomap_zero_begin,
+};
 
 const struct iomap_ops ext4_iomap_buffered_write_ops = {
 	.iomap_begin = ext4_iomap_buffered_write_begin,
@@ -4622,6 +4666,32 @@ static int ext4_journalled_block_zero_range(struct inode *inode, loff_t from,
 	return err;
 }
 
+static int ext4_iomap_block_zero_range(struct inode *inode, loff_t from,
+				       loff_t length, bool *did_zero)
+{
+	/*
+	 * Zeroing out under an active handle can cause deadlock since
+	 * the order of acquiring the folio lock and starting a handle is
+	 * inconsistent with the iomap writeback procedure.
+	 */
+	if (WARN_ON_ONCE(ext4_handle_valid(journal_current_handle())))
+		return -EINVAL;
+
+	/* The zeroing scope should not extend across a block. */
+	if (WARN_ON_ONCE((from >> inode->i_blkbits) !=
+			 ((from + length - 1) >> inode->i_blkbits)))
+		return -EINVAL;
+
+	if (!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS) &&
+	    !(inode_state_read_once(inode) & (I_NEW | I_FREEING)))
+		WARN_ON_ONCE(!inode_is_locked(inode) &&
+			!rwsem_is_locked(&inode->i_mapping->invalidate_lock));
+
+	return iomap_zero_range(inode, from, length, did_zero,
+				&ext4_iomap_zero_ops,
+				&ext4_iomap_write_ops, NULL);
+}
+
 /*
  * ext4_block_zero_page_range() zeros out a mapping of length 'length'
  * starting from file offset 'from'.  The range to be zero'd must
@@ -4650,6 +4720,9 @@ static int ext4_block_zero_page_range(struct address_space *mapping,
 	} else if (ext4_should_journal_data(inode)) {
 		return ext4_journalled_block_zero_range(inode, from,
 							length, did_zero);
+	} else if (ext4_inode_buffered_iomap(inode)) {
+		return ext4_iomap_block_zero_range(inode, from, length,
+						   did_zero);
 	}
 	return ext4_block_zero_range(inode, from, length, did_zero);
 }
@@ -5063,6 +5136,18 @@ int ext4_truncate(struct inode *inode)
 			err = zero_len;
 			goto out_trace;
 		}
+		/*
+		 * inodes using the iomap buffered I/O path do not use the
+		 * ordered data mode, it is necessary to write out zeroed data
+		 * before the updating i_disksize transaction is committed.
+		 */
+		if (zero_len > 0 && ext4_inode_buffered_iomap(inode)) {
+			err = filemap_write_and_wait_range(mapping,
+					inode->i_size,
+					inode->i_size + zero_len - 1);
+			if (err)
+				return err;
+		}
 	}
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-- 
2.52.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ