lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-ID: <20250604020850.1304633-11-yi.zhang@huaweicloud.com> Date: Wed, 4 Jun 2025 10:08:50 +0800 From: Zhang Yi <yi.zhang@...weicloud.com> To: linux-fsdevel@...r.kernel.org, linux-ext4@...r.kernel.org, linux-block@...r.kernel.org, dm-devel@...ts.linux.dev, linux-nvme@...ts.infradead.org, linux-scsi@...r.kernel.org Cc: linux-xfs@...r.kernel.org, linux-kernel@...r.kernel.org, hch@....de, tytso@....edu, djwong@...nel.org, john.g.garry@...cle.com, bmarzins@...hat.com, chaitanyak@...dia.com, shinichiro.kawasaki@....com, brauner@...nel.org, martin.petersen@...cle.com, yi.zhang@...wei.com, yi.zhang@...weicloud.com, chengzhihao1@...wei.com, yukuai3@...wei.com, yangerkun@...wei.com Subject: [PATCH 10/10] ext4: add FALLOC_FL_WRITE_ZEROES support From: Zhang Yi <yi.zhang@...wei.com> Add support for FALLOC_FL_WRITE_ZEROES if the underlying device enable the unmap write zeroes operation. This first allocates blocks as unwritten, then issues a zero command outside of the running journal handle, and finally converts them to a written state. Signed-off-by: Zhang Yi <yi.zhang@...wei.com> --- fs/ext4/extents.c | 66 ++++++++++++++++++++++++++++++------- include/trace/events/ext4.h | 3 +- 2 files changed, 57 insertions(+), 12 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b543a46fc809..29ce9f6287d0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4501,6 +4501,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, struct ext4_map_blocks map; unsigned int credits; loff_t epos, old_size = i_size_read(inode); + unsigned int blkbits = inode->i_blkbits; + bool alloc_zero = false; BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)); map.m_lblk = offset; @@ -4513,6 +4515,17 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, if (len <= EXT_UNWRITTEN_MAX_LEN) flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; + /* + * Do the actual write zero during a running journal transaction + * costs a lot. First allocate an unwritten extent and then + * convert it to written after zeroing it out. + */ + if (flags & EXT4_GET_BLOCKS_ZERO) { + flags &= ~EXT4_GET_BLOCKS_ZERO; + flags |= EXT4_GET_BLOCKS_UNWRIT_EXT; + alloc_zero = true; + } + /* * credits to insert 1 extent into extent tree */ @@ -4549,9 +4562,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, * allow a full retry cycle for any remaining allocations */ retries = 0; - map.m_lblk += ret; - map.m_len = len = len - ret; - epos = (loff_t)map.m_lblk << inode->i_blkbits; + epos = (loff_t)(map.m_lblk + ret) << blkbits; inode_set_ctime_current(inode); if (new_size) { if (epos > new_size) @@ -4571,6 +4582,21 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, ret2 = ret3 ? ret3 : ret2; if (unlikely(ret2)) break; + + if (alloc_zero && + (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) { + ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk, + map.m_len); + if (likely(!ret2)) + ret2 = ext4_convert_unwritten_extents(NULL, + inode, (loff_t)map.m_lblk << blkbits, + (loff_t)map.m_len << blkbits); + if (ret2) + break; + } + + map.m_lblk += ret; + map.m_len = len = len - ret; } if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) goto retry; @@ -4636,7 +4662,11 @@ static long ext4_zero_range(struct file *file, loff_t offset, if (end_lblk > start_lblk) { ext4_lblk_t zero_blks = end_lblk - start_lblk; - flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE); + if (mode & FALLOC_FL_WRITE_ZEROES) + flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE; + else + flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | + EXT4_EX_NOCACHE); ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks, new_size, flags); if (ret) @@ -4745,11 +4775,18 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (IS_ENCRYPTED(inode) && (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; + /* + * Don't allow writing zeroes if the underlying device does not + * enable the unmap write zeroes operation. + */ + if (!bdev_write_zeroes_unmap(inode->i_sb->s_bdev) && + (mode & FALLOC_FL_WRITE_ZEROES)) + return -EOPNOTSUPP; /* Return error if mode is not supported */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | - FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | - FALLOC_FL_INSERT_RANGE)) + FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES)) return -EOPNOTSUPP; inode_lock(inode); @@ -4780,16 +4817,23 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) if (ret) goto out_invalidate_lock; - if (mode & FALLOC_FL_PUNCH_HOLE) + switch (mode & FALLOC_FL_MODE_MASK) { + case FALLOC_FL_PUNCH_HOLE: ret = ext4_punch_hole(file, offset, len); - else if (mode & FALLOC_FL_COLLAPSE_RANGE) + break; + case FALLOC_FL_COLLAPSE_RANGE: ret = ext4_collapse_range(file, offset, len); - else if (mode & FALLOC_FL_INSERT_RANGE) + break; + case FALLOC_FL_INSERT_RANGE: ret = ext4_insert_range(file, offset, len); - else if (mode & FALLOC_FL_ZERO_RANGE) + break; + case FALLOC_FL_ZERO_RANGE: + case FALLOC_FL_WRITE_ZEROES: ret = ext4_zero_range(file, offset, len, mode); - else + break; + default: ret = -EOPNOTSUPP; + } out_invalidate_lock: filemap_invalidate_unlock(mapping); diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 156908641e68..6f9cf2811733 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B); { FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \ { FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \ { FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \ - { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) + { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}, \ + { FALLOC_FL_WRITE_ZEROES, "WRITE_ZEROES"}) TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR); TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME); -- 2.46.1
Powered by blists - more mailing lists