lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <175573713856.21970.12879323697743523642.stgit@frogsfrogsfrogs>
Date: Wed, 20 Aug 2025 18:17:32 -0700
From: "Darrick J. Wong" <djwong@...nel.org>
To: tytso@....edu
Cc: John@...ves.net, bernd@...ernd.com, linux-fsdevel@...r.kernel.org,
 linux-ext4@...r.kernel.org, miklos@...redi.hu, joannelkoong@...il.com,
 neal@...pa.dev
Subject: [PATCH 07/19] fuse2fs: implement direct write support

From: Darrick J. Wong <djwong@...nel.org>

Wire up an iomap_begin method that can allocate into holes so that we
can do directio writes.

Signed-off-by: "Darrick J. Wong" <djwong@...nel.org>
---
 misc/fuse2fs.c |  470 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 misc/fuse4fs.c |  473 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 937 insertions(+), 6 deletions(-)


diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index 4a9fda62f99bc2..e8e9056a661e71 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -5442,12 +5442,103 @@ static int fuse2fs_iomap_begin_read(struct fuse2fs *ff, ext2_ino_t ino,
 					    opflags, read);
 }
 
+static int fuse2fs_iomap_write_allocate(struct fuse2fs *ff, ext2_ino_t ino,
+				     struct ext2_inode_large *inode, off_t pos,
+				     uint64_t count, uint32_t opflags,
+				     struct fuse_file_iomap *read, bool *dirty)
+{
+	ext2_filsys fs = ff->fs;
+	blk64_t startoff = FUSE2FS_B_TO_FSBT(ff, pos);
+	blk64_t stopoff = FUSE2FS_B_TO_FSB(ff, pos + count);
+	blk64_t old_iblocks;
+	errcode_t err;
+	int ret;
+
+	dbg_printf(ff, "%s: write_alloc ino=%u startoff 0x%llx blockcount 0x%llx\n",
+		   __func__, ino, startoff, stopoff - startoff);
+
+	if (!fs_can_allocate(ff, stopoff - startoff))
+		return -ENOSPC;
+
+	old_iblocks = ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
+	err = ext2fs_fallocate(fs, EXT2_FALLOCATE_FORCE_UNINIT, ino,
+			       EXT2_INODE(inode), ~0ULL, startoff,
+			       stopoff - startoff);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/*
+	 * New allocations for file data blocks on indirect mapped files are
+	 * zeroed through the IO manager so we have to flush it to disk.
+	 */
+	if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+	    old_iblocks != ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode))) {
+		err = io_channel_flush(fs->io);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	/* pick up the newly allocated mapping */
+	ret = fuse2fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+				       read);
+	if (ret)
+		return ret;
+
+	read->flags |= FUSE_IOMAP_F_DIRTY;
+	*dirty = true;
+	return 0;
+}
+
+static off_t fuse2fs_max_file_size(const struct fuse2fs *ff,
+				   const struct ext2_inode_large *inode)
+{
+	ext2_filsys fs = ff->fs;
+	blk64_t addr_per_block, max_map_block;
+
+	if (inode->i_flags & EXT4_EXTENTS_FL) {
+		max_map_block = (1ULL << 32) - 1;
+	} else {
+		addr_per_block = fs->blocksize >> 2;
+		max_map_block = addr_per_block;
+		max_map_block += addr_per_block * addr_per_block;
+		max_map_block += addr_per_block * addr_per_block * addr_per_block;
+		max_map_block += 12;
+	}
+
+	return FUSE2FS_FSB_TO_B(ff, max_map_block) + (fs->blocksize - 1);
+}
+
 static int fuse2fs_iomap_begin_write(struct fuse2fs *ff, ext2_ino_t ino,
 				     struct ext2_inode_large *inode, off_t pos,
 				     uint64_t count, uint32_t opflags,
-				     struct fuse_file_iomap *read)
+				     struct fuse_file_iomap *read,
+				     bool *dirty)
 {
-	return -ENOSYS;
+	off_t max_size = fuse2fs_max_file_size(ff, inode);
+	int ret;
+
+	if (!(opflags & FUSE_IOMAP_OP_DIRECT))
+		return -ENOSYS;
+
+	if (pos >= max_size)
+		return -EFBIG;
+
+	if (pos >= max_size - count)
+		count = max_size - pos;
+
+	ret = fuse2fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+				       read);
+	if (ret)
+		return ret;
+
+	if (fuse_iomap_need_write_allocate(opflags, read)) {
+		ret = fuse2fs_iomap_write_allocate(ff, ino, inode, pos, count,
+						   opflags, read, dirty);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
@@ -5459,6 +5550,7 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
 	struct ext2_inode_large inode;
 	ext2_filsys fs;
 	errcode_t err;
+	bool dirty = false;
 	int ret = 0;
 
 	FUSE2FS_CHECK_CONTEXT(ff);
@@ -5484,7 +5576,7 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
 						 count, opflags, read);
 	else if (fuse_iomap_is_write(opflags))
 		ret = fuse2fs_iomap_begin_write(ff, attr_ino, &inode, pos,
-						count, opflags, read);
+						count, opflags, read, &dirty);
 	else
 		ret = fuse2fs_iomap_begin_read(ff, attr_ino, &inode, pos,
 					       count, opflags, read);
@@ -5506,6 +5598,14 @@ static int op_iomap_begin(const char *path, uint64_t nodeid, uint64_t attr_ino,
 			  read->offset + read->length <= pos))
 		fuse2fs_dump_extents(ff, attr_ino, &inode, "BAD DATA");
 
+	if (dirty) {
+		err = fuse2fs_write_inode(fs, attr_ino, &inode);
+		if (err) {
+			ret = translate_error(fs, attr_ino, err);
+			goto out_unlock;
+		}
+	}
+
 out_unlock:
 	fuse2fs_finish(ff, ret);
 	return ret;
@@ -5643,6 +5743,369 @@ static int op_iomap_config(uint64_t flags, off_t maxbytes,
 	if (ret)
 		goto out_unlock;
 
+out_unlock:
+	fuse2fs_finish(ff, ret);
+	return ret;
+}
+
+static inline bool fuse2fs_can_merge_mappings(const struct ext2fs_extent *left,
+					      const struct ext2fs_extent *right)
+{
+	uint64_t max_len = (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ?
+				EXT_UNINIT_MAX_LEN : EXT_INIT_MAX_LEN;
+
+	return left->e_lblk + left->e_len == right->e_lblk &&
+	       left->e_pblk + left->e_len == right->e_pblk &&
+	       (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
+	        (right->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
+	       (uint64_t)left->e_len + right->e_len <= max_len;
+}
+
+static int fuse2fs_try_merge_mappings(struct fuse2fs *ff, ext2_ino_t ino,
+				      ext2_extent_handle_t handle,
+				      blk64_t startoff)
+{
+	ext2_filsys fs = ff->fs;
+	struct ext2fs_extent left, right;
+	errcode_t err;
+
+	/* Look up the mappings before startoff */
+	err = fuse2fs_get_mapping_at(ff, handle, startoff - 1, &left);
+	if (err == EXT2_ET_EXTENT_NOT_FOUND)
+		return 0;
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Look up the mapping at startoff */
+	err = fuse2fs_get_mapping_at(ff, handle, startoff, &right);
+	if (err == EXT2_ET_EXTENT_NOT_FOUND)
+		return 0;
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Can we combine them? */
+	if (!fuse2fs_can_merge_mappings(&left, &right))
+		return 0;
+
+	/*
+	 * Delete the mapping after startoff because libext2fs cannot handle
+	 * overlapping mappings.
+	 */
+	err = ext2fs_extent_delete(handle, 0);
+	DUMP_EXTENT(ff, "remover", startoff, err, &right);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	err = ext2fs_extent_fix_parents(handle);
+	DUMP_EXTENT(ff, "fixremover", startoff, err, &right);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Move back and lengthen the mapping before startoff */
+	err = ext2fs_extent_goto(handle, left.e_lblk);
+	DUMP_EXTENT(ff, "movel", startoff - 1, err, &left);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	left.e_len += right.e_len;
+	err = ext2fs_extent_replace(handle, 0, &left);
+	DUMP_EXTENT(ff, "replacel", startoff - 1, err, &left);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	err = ext2fs_extent_fix_parents(handle);
+	DUMP_EXTENT(ff, "fixreplacel", startoff - 1, err, &left);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	return 0;
+}
+
+static int fuse2fs_convert_unwritten_mapping(struct fuse2fs *ff,
+					     ext2_ino_t ino,
+					     struct ext2_inode_large *inode,
+					     ext2_extent_handle_t handle,
+					     blk64_t *cursor, blk64_t stopoff)
+{
+	ext2_filsys fs = ff->fs;
+	struct ext2fs_extent extent;
+	blk64_t startoff = *cursor;
+	errcode_t err;
+
+	/*
+	 * Find the mapping at startoff.  Note that we can find holes because
+	 * the mapping data can change due to racing writes.
+	 */
+	err = fuse2fs_get_mapping_at(ff, handle, startoff, &extent);
+	if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+		/*
+		 * If we didn't find any mappings at all then the file is
+		 * completely sparse.  There's nothing to convert.
+		 */
+		*cursor = stopoff;
+		return 0;
+	}
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/*
+	 * The mapping is completely to the left of the range that we want.
+	 * Let's see what's in the next extent, if there is one.
+	 */
+	if (startoff >= extent.e_lblk + extent.e_len) {
+		/*
+		 * Mapping ends to the left of the current position.  Try to
+		 * find the next mapping.  If there is no next mapping, then
+		 * we're done.
+		 */
+		err = fuse2fs_get_next_mapping(ff, handle, startoff, &extent);
+		if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+			*cursor = stopoff;
+			return 0;
+		}
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	/*
+	 * The mapping is completely to the right of the range that we want,
+	 * so we're done.
+	 */
+	if (extent.e_lblk >= stopoff) {
+		*cursor = stopoff;
+		return 0;
+	}
+
+	/*
+	 * At this point, we have a mapping that overlaps (startoff, stopoff].
+	 * If the mapping is already written, move on to the next one.
+	 */
+	if (!(extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT))
+		goto next;
+
+	if (startoff > extent.e_lblk) {
+		struct ext2fs_extent newex = extent;
+
+		/*
+		 * Unwritten mapping starts before startoff.  Shorten
+		 * the previous mapping...
+		 */
+		newex.e_len = startoff - extent.e_lblk;
+		err = ext2fs_extent_replace(handle, 0, &newex);
+		DUMP_EXTENT(ff, "shortenp", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixshortenp", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		/* ...and create new written mapping at startoff. */
+		extent.e_len -= newex.e_len;
+		extent.e_lblk += newex.e_len;
+		extent.e_pblk += newex.e_len;
+		extent.e_flags = newex.e_flags & ~EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_insert(handle,
+					   EXT2_EXTENT_INSERT_AFTER,
+					   &extent);
+		DUMP_EXTENT(ff, "insertx", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixinsertx", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	if (extent.e_lblk + extent.e_len > stopoff) {
+		struct ext2fs_extent newex = extent;
+
+		/*
+		 * Unwritten mapping ends after stopoff.  Shorten the current
+		 * mapping...
+		 */
+		extent.e_len = stopoff - extent.e_lblk;
+		extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_replace(handle, 0, &extent);
+		DUMP_EXTENT(ff, "shortenn", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixshortenn", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		/* ..and create a new unwritten mapping at stopoff. */
+		newex.e_pblk += extent.e_len;
+		newex.e_lblk += extent.e_len;
+		newex.e_len -= extent.e_len;
+		newex.e_flags |= EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_insert(handle,
+					   EXT2_EXTENT_INSERT_AFTER,
+					   &newex);
+		DUMP_EXTENT(ff, "insertn", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixinsertn", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	/* Still unwritten?  Update the state. */
+	if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
+		extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_replace(handle, 0, &extent);
+		DUMP_EXTENT(ff, "replacex", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixreplacex", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+next:
+	/* Try to merge with the previous extent */
+	if (startoff > 0) {
+		err = fuse2fs_try_merge_mappings(ff, ino, handle, startoff);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	*cursor = extent.e_lblk + extent.e_len;
+	return 0;
+}
+
+static int fuse2fs_convert_unwritten_mappings(struct fuse2fs *ff,
+					      ext2_ino_t ino,
+					      struct ext2_inode_large *inode,
+					      off_t pos, size_t written)
+{
+	ext2_extent_handle_t handle;
+	ext2_filsys fs = ff->fs;
+	blk64_t startoff = FUSE2FS_B_TO_FSBT(ff, pos);
+	const blk64_t stopoff = FUSE2FS_B_TO_FSB(ff, pos + written);
+	errcode_t err;
+	int ret;
+
+	err = ext2fs_extent_open2(fs, ino, EXT2_INODE(inode), &handle);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Walk every mapping in the range, converting them. */
+	while (startoff < stopoff) {
+		blk64_t old_startoff = startoff;
+
+		ret = fuse2fs_convert_unwritten_mapping(ff, ino, inode, handle,
+							&startoff, stopoff);
+		if (ret)
+			goto out_handle;
+		if (startoff <= old_startoff) {
+			/* Do not go backwards. */
+			ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+			goto out_handle;
+		}
+	}
+
+	/* Try to merge the right edge */
+	ret = fuse2fs_try_merge_mappings(ff, ino, handle, stopoff);
+out_handle:
+	ext2fs_extent_free(handle);
+	return ret;
+}
+
+static int op_iomap_ioend(const char *path, uint64_t nodeid, uint64_t attr_ino,
+			  off_t pos, size_t written, uint32_t ioendflags,
+			  int error, uint64_t new_addr)
+{
+	struct fuse2fs *ff = fuse2fs_get();
+	struct ext2_inode_large inode;
+	ext2_filsys fs;
+	errcode_t err;
+	bool dirty = false;
+	int ret = 0;
+
+	FUSE2FS_CHECK_CONTEXT(ff);
+
+	dbg_printf(ff,
+ "%s: path=%s nodeid=%llu attr_ino=%llu pos=0x%llx written=0x%zx ioendflags=0x%x error=%d new_addr=%llu\n",
+		   __func__, path,
+		   (unsigned long long)nodeid,
+		   (unsigned long long)attr_ino,
+		   (unsigned long long)pos,
+		   written,
+		   ioendflags,
+		   error,
+		   (unsigned long long)new_addr);
+
+	fs = fuse2fs_start(ff);
+	if (error) {
+		ret = error;
+		goto out_unlock;
+	}
+
+	/* should never see these ioend types */
+	if (ioendflags & FUSE_IOMAP_IOEND_SHARED) {
+		ret = translate_error(fs, attr_ino,
+				      EXT2_ET_FILESYSTEM_CORRUPTED);
+		goto out_unlock;
+	}
+
+	err = fuse2fs_read_inode(fs, attr_ino, &inode);
+	if (err) {
+		ret = translate_error(fs, attr_ino, err);
+		goto out_unlock;
+	}
+
+	if (ioendflags & FUSE_IOMAP_IOEND_UNWRITTEN) {
+		/* unwritten extents are only supported on extents files */
+		if (!(inode.i_flags & EXT4_EXTENTS_FL)) {
+			ret = translate_error(fs, attr_ino,
+					      EXT2_ET_FILESYSTEM_CORRUPTED);
+			goto out_unlock;
+		}
+
+		ret = fuse2fs_convert_unwritten_mappings(ff, attr_ino, &inode,
+							 pos, written);
+		if (ret)
+			goto out_unlock;
+
+		dirty = true;
+	}
+
+	if (ioendflags & FUSE_IOMAP_IOEND_APPEND) {
+		ext2_off64_t isize = EXT2_I_SIZE(&inode);
+
+		if (pos + written > isize) {
+			err = ext2fs_inode_size_set(fs, EXT2_INODE(&inode),
+						    pos + written);
+			if (err) {
+				ret = translate_error(fs, attr_ino, err);
+				goto out_unlock;
+			}
+
+			dirty = true;
+		}
+	}
+
+	if (dirty) {
+		err = fuse2fs_write_inode(fs, attr_ino, &inode);
+		if (err) {
+			ret = translate_error(fs, attr_ino, err);
+			goto out_unlock;
+		}
+	}
+
 out_unlock:
 	fuse2fs_finish(ff, ret);
 	return ret;
@@ -5713,6 +6176,7 @@ static struct fuse_operations fs_ops = {
 	.iomap_begin = op_iomap_begin,
 	.iomap_end = op_iomap_end,
 	.iomap_config = op_iomap_config,
+	.iomap_ioend = op_iomap_ioend,
 #endif /* HAVE_FUSE_IOMAP */
 };
 
diff --git a/misc/fuse4fs.c b/misc/fuse4fs.c
index 0ac5de90498dac..ff50182b929974 100644
--- a/misc/fuse4fs.c
+++ b/misc/fuse4fs.c
@@ -5850,12 +5850,106 @@ static int fuse4fs_iomap_begin_read(struct fuse4fs *ff, ext2_ino_t ino,
 					    opflags, read);
 }
 
+static int fuse4fs_iomap_write_allocate(struct fuse4fs *ff, ext2_ino_t ino,
+					struct ext2_inode_large *inode,
+					off_t pos, uint64_t count,
+					uint32_t opflags,
+					struct fuse_file_iomap *read,
+					bool *dirty)
+{
+	ext2_filsys fs = ff->fs;
+	blk64_t startoff = FUSE4FS_B_TO_FSBT(ff, pos);
+	blk64_t stopoff = FUSE4FS_B_TO_FSB(ff, pos + count);
+	blk64_t old_iblocks;
+	errcode_t err;
+	int ret;
+
+	dbg_printf(ff,
+ "%s: ino=%d startoff 0x%llx blockcount 0x%llx\n",
+		   __func__, ino, startoff, stopoff - startoff);
+
+	if (!fuse4fs_can_allocate(ff, stopoff - startoff))
+		return -ENOSPC;
+
+	old_iblocks = ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode));
+	err = ext2fs_fallocate(fs, EXT2_FALLOCATE_FORCE_UNINIT, ino,
+			       EXT2_INODE(inode), ~0ULL, startoff,
+			       stopoff - startoff);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/*
+	 * New allocations for file data blocks on indirect mapped files are
+	 * zeroed through the IO manager so we have to flush it to disk.
+	 */
+	if (!(inode->i_flags & EXT4_EXTENTS_FL) &&
+	    old_iblocks != ext2fs_get_stat_i_blocks(fs, EXT2_INODE(inode))) {
+		err = io_channel_flush(fs->io);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	/* pick up the newly allocated mapping */
+	ret = fuse4fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+				       read);
+	if (ret)
+		return ret;
+
+	read->flags |= FUSE_IOMAP_F_DIRTY;
+	*dirty = true;
+	return 0;
+}
+
+static off_t fuse4fs_max_file_size(const struct fuse4fs *ff,
+				   const struct ext2_inode_large *inode)
+{
+	ext2_filsys fs = ff->fs;
+	blk64_t addr_per_block, max_map_block;
+
+	if (inode->i_flags & EXT4_EXTENTS_FL) {
+		max_map_block = (1ULL << 32) - 1;
+	} else {
+		addr_per_block = fs->blocksize >> 2;
+		max_map_block = addr_per_block;
+		max_map_block += addr_per_block * addr_per_block;
+		max_map_block += addr_per_block * addr_per_block * addr_per_block;
+		max_map_block += 12;
+	}
+
+	return FUSE4FS_FSB_TO_B(ff, max_map_block) + (fs->blocksize - 1);
+}
+
 static int fuse4fs_iomap_begin_write(struct fuse4fs *ff, ext2_ino_t ino,
 				     struct ext2_inode_large *inode, off_t pos,
 				     uint64_t count, uint32_t opflags,
-				     struct fuse_file_iomap *read)
+				     struct fuse_file_iomap *read,
+				     bool *dirty)
 {
-	return -ENOSYS;
+	off_t max_size = fuse4fs_max_file_size(ff, inode);
+	int ret;
+
+	if (!(opflags & FUSE_IOMAP_OP_DIRECT))
+		return -ENOSYS;
+
+	if (pos >= max_size)
+		return -EFBIG;
+
+	if (pos >= max_size - count)
+		count = max_size - pos;
+
+	ret = fuse4fs_iomap_begin_read(ff, ino, inode, pos, count, opflags,
+				       read);
+	if (ret)
+		return ret;
+
+	if (fuse_iomap_need_write_allocate(opflags, read)) {
+		ret = fuse4fs_iomap_write_allocate(ff, ino, inode, pos, count,
+						   opflags, read, dirty);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
@@ -5867,6 +5961,7 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
 	ext2_filsys fs;
 	ext2_ino_t ino;
 	errcode_t err;
+	bool dirty = false;
 	int ret = 0;
 
 	FUSE4FS_CHECK_CONTEXT(req);
@@ -5890,7 +5985,7 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
 						 opflags, &read);
 	else if (fuse_iomap_is_write(opflags))
 		ret = fuse4fs_iomap_begin_write(ff, ino, &inode, pos, count,
-						opflags, &read);
+						opflags, &read, &dirty);
 	else
 		ret = fuse4fs_iomap_begin_read(ff, ino, &inode, pos, count,
 					       opflags, &read);
@@ -5912,6 +6007,14 @@ static void op_iomap_begin(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
 			  read.offset + read.length <= pos))
 		fuse4fs_dump_extents(ff, ino, &inode, "BAD DATA");
 
+	if (dirty) {
+		err = fuse4fs_write_inode(fs, ino, &inode);
+		if (err) {
+			ret = translate_error(fs, ino, err);
+			goto out_unlock;
+		}
+	}
+
 out_unlock:
 	fuse4fs_finish(ff, ret);
 	if (ret)
@@ -6059,6 +6162,369 @@ static void op_iomap_config(fuse_req_t req, uint64_t flags, uint64_t maxbytes)
 	else
 		fuse_reply_iomap_config(req, &cfg);
 }
+
+static inline bool fuse4fs_can_merge_mappings(const struct ext2fs_extent *left,
+					      const struct ext2fs_extent *right)
+{
+	uint64_t max_len = (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ?
+				EXT_UNINIT_MAX_LEN : EXT_INIT_MAX_LEN;
+
+	return left->e_lblk + left->e_len == right->e_lblk &&
+	       left->e_pblk + left->e_len == right->e_pblk &&
+	       (left->e_flags & EXT2_EXTENT_FLAGS_UNINIT) ==
+	        (right->e_flags & EXT2_EXTENT_FLAGS_UNINIT) &&
+	       (uint64_t)left->e_len + right->e_len <= max_len;
+}
+
+static int fuse4fs_try_merge_mappings(struct fuse4fs *ff, ext2_ino_t ino,
+				      ext2_extent_handle_t handle,
+				      blk64_t startoff)
+{
+	ext2_filsys fs = ff->fs;
+	struct ext2fs_extent left, right;
+	errcode_t err;
+
+	/* Look up the mappings before startoff */
+	err = fuse4fs_get_mapping_at(ff, handle, startoff - 1, &left);
+	if (err == EXT2_ET_EXTENT_NOT_FOUND)
+		return 0;
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Look up the mapping at startoff */
+	err = fuse4fs_get_mapping_at(ff, handle, startoff, &right);
+	if (err == EXT2_ET_EXTENT_NOT_FOUND)
+		return 0;
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Can we combine them? */
+	if (!fuse4fs_can_merge_mappings(&left, &right))
+		return 0;
+
+	/*
+	 * Delete the mapping after startoff because libext2fs cannot handle
+	 * overlapping mappings.
+	 */
+	err = ext2fs_extent_delete(handle, 0);
+	DUMP_EXTENT(ff, "remover", startoff, err, &right);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	err = ext2fs_extent_fix_parents(handle);
+	DUMP_EXTENT(ff, "fixremover", startoff, err, &right);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Move back and lengthen the mapping before startoff */
+	err = ext2fs_extent_goto(handle, left.e_lblk);
+	DUMP_EXTENT(ff, "movel", startoff - 1, err, &left);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	left.e_len += right.e_len;
+	err = ext2fs_extent_replace(handle, 0, &left);
+	DUMP_EXTENT(ff, "replacel", startoff - 1, err, &left);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	err = ext2fs_extent_fix_parents(handle);
+	DUMP_EXTENT(ff, "fixreplacel", startoff - 1, err, &left);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	return 0;
+}
+
+static int fuse4fs_convert_unwritten_mapping(struct fuse4fs *ff,
+					     ext2_ino_t ino,
+					     struct ext2_inode_large *inode,
+					     ext2_extent_handle_t handle,
+					     blk64_t *cursor, blk64_t stopoff)
+{
+	ext2_filsys fs = ff->fs;
+	struct ext2fs_extent extent;
+	blk64_t startoff = *cursor;
+	errcode_t err;
+
+	/*
+	 * Find the mapping at startoff.  Note that we can find holes because
+	 * the mapping data can change due to racing writes.
+	 */
+	err = fuse4fs_get_mapping_at(ff, handle, startoff, &extent);
+	if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+		/*
+		 * If we didn't find any mappings at all then the file is
+		 * completely sparse.  There's nothing to convert.
+		 */
+		*cursor = stopoff;
+		return 0;
+	}
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/*
+	 * The mapping is completely to the left of the range that we want.
+	 * Let's see what's in the next extent, if there is one.
+	 */
+	if (startoff >= extent.e_lblk + extent.e_len) {
+		/*
+		 * Mapping ends to the left of the current position.  Try to
+		 * find the next mapping.  If there is no next mapping, then
+		 * we're done.
+		 */
+		err = fuse4fs_get_next_mapping(ff, handle, startoff, &extent);
+		if (err == EXT2_ET_EXTENT_NOT_FOUND) {
+			*cursor = stopoff;
+			return 0;
+		}
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	/*
+	 * The mapping is completely to the right of the range that we want,
+	 * so we're done.
+	 */
+	if (extent.e_lblk >= stopoff) {
+		*cursor = stopoff;
+		return 0;
+	}
+
+	/*
+	 * At this point, we have a mapping that overlaps (startoff, stopoff].
+	 * If the mapping is already written, move on to the next one.
+	 */
+	if (!(extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT))
+		goto next;
+
+	if (startoff > extent.e_lblk) {
+		struct ext2fs_extent newex = extent;
+
+		/*
+		 * Unwritten mapping starts before startoff.  Shorten
+		 * the previous mapping...
+		 */
+		newex.e_len = startoff - extent.e_lblk;
+		err = ext2fs_extent_replace(handle, 0, &newex);
+		DUMP_EXTENT(ff, "shortenp", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixshortenp", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		/* ...and create new written mapping at startoff. */
+		extent.e_len -= newex.e_len;
+		extent.e_lblk += newex.e_len;
+		extent.e_pblk += newex.e_len;
+		extent.e_flags = newex.e_flags & ~EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_insert(handle,
+					   EXT2_EXTENT_INSERT_AFTER,
+					   &extent);
+		DUMP_EXTENT(ff, "insertx", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixinsertx", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	if (extent.e_lblk + extent.e_len > stopoff) {
+		struct ext2fs_extent newex = extent;
+
+		/*
+		 * Unwritten mapping ends after stopoff.  Shorten the current
+		 * mapping...
+		 */
+		extent.e_len = stopoff - extent.e_lblk;
+		extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_replace(handle, 0, &extent);
+		DUMP_EXTENT(ff, "shortenn", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixshortenn", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		/* ..and create a new unwritten mapping at stopoff. */
+		newex.e_pblk += extent.e_len;
+		newex.e_lblk += extent.e_len;
+		newex.e_len -= extent.e_len;
+		newex.e_flags |= EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_insert(handle,
+					   EXT2_EXTENT_INSERT_AFTER,
+					   &newex);
+		DUMP_EXTENT(ff, "insertn", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixinsertn", startoff, err, &newex);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	/* Still unwritten?  Update the state. */
+	if (extent.e_flags & EXT2_EXTENT_FLAGS_UNINIT) {
+		extent.e_flags &= ~EXT2_EXTENT_FLAGS_UNINIT;
+
+		err = ext2fs_extent_replace(handle, 0, &extent);
+		DUMP_EXTENT(ff, "replacex", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+
+		err = ext2fs_extent_fix_parents(handle);
+		DUMP_EXTENT(ff, "fixreplacex", startoff, err, &extent);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+next:
+	/* Try to merge with the previous extent */
+	if (startoff > 0) {
+		err = fuse4fs_try_merge_mappings(ff, ino, handle, startoff);
+		if (err)
+			return translate_error(fs, ino, err);
+	}
+
+	*cursor = extent.e_lblk + extent.e_len;
+	return 0;
+}
+
+static int fuse4fs_convert_unwritten_mappings(struct fuse4fs *ff,
+					      ext2_ino_t ino,
+					      struct ext2_inode_large *inode,
+					      off_t pos, size_t written)
+{
+	ext2_extent_handle_t handle;
+	ext2_filsys fs = ff->fs;
+	blk64_t startoff = FUSE4FS_B_TO_FSBT(ff, pos);
+	const blk64_t stopoff = FUSE4FS_B_TO_FSB(ff, pos + written);
+	errcode_t err;
+	int ret;
+
+	err = ext2fs_extent_open2(fs, ino, EXT2_INODE(inode), &handle);
+	if (err)
+		return translate_error(fs, ino, err);
+
+	/* Walk every mapping in the range, converting them. */
+	while (startoff < stopoff) {
+		blk64_t old_startoff = startoff;
+
+		ret = fuse4fs_convert_unwritten_mapping(ff, ino, inode, handle,
+							&startoff, stopoff);
+		if (ret)
+			goto out_handle;
+		if (startoff <= old_startoff) {
+			/* Do not go backwards. */
+			ret = translate_error(fs, ino, EXT2_ET_INODE_CORRUPTED);
+			goto out_handle;
+		}
+	}
+
+	/* Try to merge the right edge */
+	ret = fuse4fs_try_merge_mappings(ff, ino, handle, stopoff);
+out_handle:
+	ext2fs_extent_free(handle);
+	return ret;
+}
+
+static void op_iomap_ioend(fuse_req_t req, fuse_ino_t fino, uint64_t dontcare,
+			   off_t pos, size_t written, uint32_t ioendflags,
+			   int error, uint64_t new_addr)
+{
+	struct fuse4fs *ff = fuse4fs_get(req);
+	struct ext2_inode_large inode;
+	ext2_filsys fs;
+	ext2_ino_t ino;
+	errcode_t err;
+	bool dirty = false;
+	int ret = 0;
+
+	FUSE4FS_CHECK_CONTEXT(req);
+	FUSE4FS_CONVERT_FINO(req, &ino, fino);
+
+	dbg_printf(ff,
+ "%s: ino=%d pos=0x%llx written=0x%zx ioendflags=0x%x error=%d new_addr=0x%llx\n",
+		   __func__, ino,
+		   (unsigned long long)pos,
+		   written,
+		   ioendflags,
+		   error,
+		   (unsigned long long)new_addr);
+
+	if (error) {
+		fuse_reply_err(req, -error);
+		return;
+	}
+
+	fs = fuse4fs_start(ff);
+
+	/* should never see these ioend types */
+	if (ioendflags & FUSE_IOMAP_IOEND_SHARED) {
+		ret = translate_error(fs, ino, EXT2_ET_FILESYSTEM_CORRUPTED);
+		goto out_unlock;
+	}
+
+	err = fuse4fs_read_inode(fs, ino, &inode);
+	if (err) {
+		ret = translate_error(fs, ino, err);
+		goto out_unlock;
+	}
+
+	if (ioendflags & FUSE_IOMAP_IOEND_UNWRITTEN) {
+		/* unwritten extents are only supported on extents files */
+		if (!(inode.i_flags & EXT4_EXTENTS_FL)) {
+			ret = translate_error(fs, ino,
+					      EXT2_ET_FILESYSTEM_CORRUPTED);
+			goto out_unlock;
+		}
+
+		ret = fuse4fs_convert_unwritten_mappings(ff, ino, &inode,
+							 pos, written);
+		if (ret)
+			goto out_unlock;
+
+		dirty = true;
+	}
+
+	if (ioendflags & FUSE_IOMAP_IOEND_APPEND) {
+		ext2_off64_t isize = EXT2_I_SIZE(&inode);
+
+		if (pos + written > isize) {
+			err = ext2fs_inode_size_set(fs, EXT2_INODE(&inode),
+						    pos + written);
+			if (err) {
+				ret = translate_error(fs, ino, err);
+				goto out_unlock;
+			}
+
+			dirty = true;
+		}
+	}
+
+	if (dirty) {
+		err = fuse4fs_write_inode(fs, ino, &inode);
+		if (err) {
+			ret = translate_error(fs, ino, err);
+			goto out_unlock;
+		}
+	}
+
+out_unlock:
+	fuse4fs_finish(ff, ret);
+	fuse_reply_err(req, -ret);
+}
 #endif /* HAVE_FUSE_IOMAP */
 
 static struct fuse_lowlevel_ops fs_ops = {
@@ -6108,6 +6574,7 @@ static struct fuse_lowlevel_ops fs_ops = {
 	.iomap_begin = op_iomap_begin,
 	.iomap_end = op_iomap_end,
 	.iomap_config = op_iomap_config,
+	.iomap_ioend = op_iomap_ioend,
 #endif /* HAVE_FUSE_IOMAP */
 };
 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ