[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <175573713910.21970.597991894155936504.stgit@frogsfrogsfrogs>
Date: Wed, 20 Aug 2025 18:18:18 -0700
From: "Darrick J. Wong" <djwong@...nel.org>
To: tytso@....edu
Cc: John@...ves.net, bernd@...ernd.com, linux-fsdevel@...r.kernel.org,
linux-ext4@...r.kernel.org, miklos@...redi.hu, joannelkoong@...il.com,
neal@...pa.dev
Subject: [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is
enabled
From: Darrick J. Wong <djwong@...nel.org>
When iomap is in use for the page cache, the kernel will take care of
all the file data block IO for us, including zeroing of punched ranges
and post-EOF bytes. fuse2fs only needs to do IO for inline data.
Therefore, set the NOBLOCKIO ext2_file flag so that libext2fs will not
do any regular file IO to or from disk blocks at all.
Signed-off-by: "Darrick J. Wong" <djwong@...nel.org>
---
misc/fuse2fs.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
misc/fuse4fs.c | 11 ++++++++-
2 files changed, 81 insertions(+), 2 deletions(-)
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index dcf002f380b843..588b0053f43c95 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -3158,15 +3158,72 @@ static int fuse2fs_punch_posteof(struct fuse2fs *ff, ext2_ino_t ino,
return 0;
}
+/*
+ * Decide if file IO for this inode can use iomap.
+ *
+ * It turns out that libfuse creates internal node ids that have nothing to do
+ * with the ext2_ino_t that we give it. These internal node ids are what
+ * actually gets igetted in the kernel, which means that there can be multiple
+ * fuse_inode objects in the kernel for a single hardlinked ondisk ext2 inode.
+ *
+ * What this means, horrifyingly, is that on a fuse filesystem that supports
+ * hard links, the in-kernel i_rwsem does not protect against concurrent writes
+ * between files that point to the same inode. That in turn means that the
+ * file mode and size can get desynchronized between the multiple fuse_inode
+ * objects. This also means that we cannot cache iomaps in the kernel AT ALL
+ * because the caches will get out of sync, leading to WARN_ONs from the iomap
+ * zeroing code and probably data corruption after that.
+ *
+ * Therefore, libfuse won't let us create hardlinks of iomap files, and we must
+ * never turn on iomap for existing hardlinked files. Long term it means we
+ * have to find a way around this loss of functionality. fuse4fs gets around
+ * this by being a low level fuse driver and controlling the nodeids itself.
+ *
+ * Returns 0 for no, 1 for yes, or a negative errno.
+ */
+#ifdef HAVE_FUSE_IOMAP
+static int fuse2fs_file_uses_iomap(struct fuse2fs *ff, ext2_ino_t ino)
+{
+ struct stat statbuf;
+ int ret;
+
+ if (!fuse2fs_iomap_enabled(ff))
+ return 0;
+
+ ret = stat_inode(ff->fs, ino, &statbuf);
+ if (ret)
+ return ret;
+
+ /* the kernel handles all block IO for us in iomap mode */
+ return fuse_fs_can_enable_iomap(&statbuf);
+}
+#else
+# define fuse2fs_file_uses_iomap(...) (0)
+#endif
+
static int fuse2fs_truncate(struct fuse2fs *ff, ext2_ino_t ino, off_t new_size)
{
ext2_filsys fs = ff->fs;
ext2_file_t file;
__u64 old_isize;
errcode_t err;
+ int flags = EXT2_FILE_WRITE;
int ret = 0;
- err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &file);
+ /* the kernel handles all eof zeroing for us in iomap mode */
+ ret = fuse2fs_file_uses_iomap(ff, ino);
+ switch (ret) {
+ case 0:
+ break;
+ case 1:
+ flags |= EXT2_FILE_NOBLOCKIO;
+ ret = 0;
+ break;
+ default:
+ return ret;
+ }
+
+ err = ext2fs_file_open(fs, ino, flags, &file);
if (err)
return translate_error(fs, ino, err);
@@ -3324,6 +3381,19 @@ static int __op_open(struct fuse2fs *ff, const char *path,
goto out;
}
+ /* the kernel handles all block IO for us in iomap mode */
+ ret = fuse2fs_file_uses_iomap(ff, file->ino);
+ switch (ret) {
+ case 0:
+ break;
+ case 1:
+ file->open_flags |= EXT2_FILE_NOBLOCKIO;
+ ret = 0;
+ break;
+ default:
+ goto out;
+ }
+
if (fp->flags & O_TRUNC) {
ret = fuse2fs_truncate(ff, file->ino, 0);
if (ret)
diff --git a/misc/fuse4fs.c b/misc/fuse4fs.c
index 3082c23e398adf..e08c5af5abfd27 100644
--- a/misc/fuse4fs.c
+++ b/misc/fuse4fs.c
@@ -3375,9 +3375,14 @@ static int fuse4fs_truncate(struct fuse4fs *ff, ext2_ino_t ino, off_t new_size)
ext2_file_t file;
__u64 old_isize;
errcode_t err;
+ int flags = EXT2_FILE_WRITE;
int ret = 0;
- err = ext2fs_file_open(fs, ino, EXT2_FILE_WRITE, &file);
+ /* the kernel handles all eof zeroing for us in iomap mode */
+ if (fuse4fs_iomap_enabled(ff))
+ flags |= EXT2_FILE_NOBLOCKIO;
+
+ err = ext2fs_file_open(fs, ino, flags, &file);
if (err)
return translate_error(fs, ino, err);
@@ -3472,6 +3477,10 @@ static int fuse4fs_open_file(struct fuse4fs *ff, const struct fuse_ctx *ctxt,
if (linked)
check |= L_OK;
+ /* the kernel handles all block IO for us in iomap mode */
+ if (fuse4fs_iomap_enabled(ff))
+ file->open_flags |= EXT2_FILE_NOBLOCKIO;
+
/*
* If the caller wants to truncate the file, we need to ask for full
* write access even if the caller claims to be appending.
Powered by blists - more mailing lists