[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <c7ac23316807de400c2c162f3778bf02178b869a.1742800203.git.ojaswin@linux.ibm.com>
Date: Mon, 24 Mar 2025 13:07:08 +0530
From: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
To: linux-ext4@...r.kernel.org, "Theodore Ts'o" <tytso@....edu>
Cc: John Garry <john.g.garry@...cle.com>, dchinner@...hat.com,
"Darrick J . Wong" <djwong@...nel.org>,
Ritesh Harjani <ritesh.list@...il.com>, linux-kernel@...r.kernel.org
Subject: [RFC v3 10/11] ext4: add support for adding focealign via SETXATTR ioctl
With forcealign set on an inode, we should always either get an extent
physically aligned to the extsize or we should error out. This is
suitable for hardware accelerated atomic writes since it allows us to
exit early rather than sending the bio and then getting an error from
the device.
This patch adds the SET/GETXATTR ioctl level support to set/get this
flag. Right now, this can only be set if extsize is set on an inode.
Since we are almost out of inode flags, we reuse the unused
EXT4_EOFBLOCKS_FL.
Signed-off-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
---
fs/ext4/ext4.h | 5 ++-
fs/ext4/ext4_jbd2.h | 8 +++++
fs/ext4/extents.c | 7 ++++-
fs/ext4/inode.c | 16 ++++++++--
fs/ext4/ioctl.c | 69 +++++++++++++++++++++++++++++++++++++++++
include/uapi/linux/fs.h | 6 ++--
6 files changed, 104 insertions(+), 7 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index a7429797c1d2..690caad50cb6 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -514,6 +514,9 @@ struct flex_groups {
#define EXT4_CASEFOLD_FL 0x40000000 /* Casefolded directory */
#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
+/* Extended flags, can only be set via FS_SETXATTR ioctl */
+#define EXT4_FORCEALIGN_XFL 0x00400000 /* Inode must do algined allocation */
+
/* User modifiable flags */
#define EXT4_FL_USER_MODIFIABLE (EXT4_SECRM_FL | \
EXT4_UNRM_FL | \
@@ -528,7 +531,6 @@ struct flex_groups {
EXT4_DIRSYNC_FL | \
EXT4_TOPDIR_FL | \
EXT4_EXTENTS_FL | \
- 0x00400000 /* EXT4_EOFBLOCKS_FL */ | \
EXT4_DAX_FL | \
EXT4_PROJINHERIT_FL | \
EXT4_CASEFOLD_FL)
@@ -605,6 +607,7 @@ enum {
EXT4_INODE_VERITY = 20, /* Verity protected inode */
EXT4_INODE_EA_INODE = 21, /* Inode used for large EA */
/* 22 was formerly EXT4_INODE_EOFBLOCKS */
+ EXT4_INODE_FORCEALIGN = 22, /* Inode should do aligned allocation */
EXT4_INODE_DAX = 25, /* Inode is DAX */
EXT4_INODE_INLINE_DATA = 28, /* Data in inode. */
EXT4_INODE_PROJINHERIT = 29, /* Create with parents projid */
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 53b930f6c797..f88149ff0033 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -467,6 +467,14 @@ static inline int ext4_should_use_extsize(struct inode *inode)
return (ext4_inode_get_extsize(EXT4_I(inode)) > 0);
}
+static inline int ext4_should_use_forcealign(struct inode *inode)
+{
+ if (!ext4_should_use_extsize(inode))
+ return 0;
+
+ return (ext4_test_inode_flag(inode, EXT4_INODE_FORCEALIGN));
+}
+
static inline int ext4_should_use_unwrit_extents(struct inode *inode)
{
return (ext4_should_dioread_nolock(inode) ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 25c1368b49bb..1835e18f0eef 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4855,9 +4855,14 @@ static long ext4_do_fallocate(struct file *file, loff_t offset,
}
flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
- if (ext4_should_use_extsize(inode))
+ if (ext4_should_use_extsize(inode)) {
flags |= EXT4_GET_BLOCKS_EXTSIZE;
+ if (ext4_should_use_forcealign(inode)) {
+ flags |= EXT4_GET_BLOCKS_FORCEALIGN;
+ }
+ }
+
ret = ext4_alloc_file_blocks(file, start_lblk, len_lblk, new_size,
flags);
if (ret)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 93ab76cb4818..5b36e62872d6 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -922,7 +922,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
* deallocation both respect extsize. If
* not, something has gone terribly wrong.
*/
- if (WARN_ON((map->m_lblk != extsize_mlblk) ||
+ if (WARN_ON_ONCE((map->m_lblk != extsize_mlblk) ||
(map->m_len != extsize_mlen))) {
ext4_error_adjust_map(map, orig_map);
ext4_warning(
@@ -1138,9 +1138,14 @@ int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
int ret = 0;
int flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
- if (ext4_should_use_extsize(inode))
+ if (ext4_should_use_extsize(inode)) {
flags |= EXT4_GET_BLOCKS_EXTSIZE;
+ if (ext4_should_use_forcealign(inode)) {
+ flags |= EXT4_GET_BLOCKS_FORCEALIGN;
+ }
+ }
+
ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
inode->i_ino, create);
ret = _ext4_get_block(inode, iblock, bh_result, flags);
@@ -3720,8 +3725,13 @@ static int ext4_iomap_alloc(struct inode *inode, struct ext4_map_blocks *map,
else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
m_flags = EXT4_GET_BLOCKS_IO_CREATE_EXT;
- if (ext4_should_use_extsize(inode))
+ if (ext4_should_use_extsize(inode)) {
m_flags |= EXT4_GET_BLOCKS_EXTSIZE;
+
+ if (ext4_should_use_forcealign(inode)) {
+ m_flags |= EXT4_GET_BLOCKS_FORCEALIGN;
+ }
+ }
}
if (ext4_should_use_extsize(inode))
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 48f62d7c27e6..5c3cdbe17e2b 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -795,6 +795,67 @@ static int ext4_ioctl_setextsize(struct inode *inode, u32 extsize, u32 xflags)
return err;
}
+/*
+ * If forcealign = 0 then caller wants to unset it.
+ */
+static int ext4_ioctl_setforcealign(struct inode *inode, bool forcealign)
+{
+ int err = 0;
+ char *msg = NULL;
+ handle_t *handle;
+
+ bool has_forcealign = ext4_test_inode_flag(inode, EXT4_INODE_FORCEALIGN);
+ bool set_forcealign = (forcealign && !has_forcealign);
+ bool unset_forcealign = (!forcealign && has_forcealign);
+
+ bool modify_forcealign = ((set_forcealign && !has_forcealign) || unset_forcealign);
+ if (!modify_forcealign)
+ return 0;
+
+ if (set_forcealign && !ext4_inode_get_extsize(EXT4_I(inode))) {
+ msg = "forcealign can't be used without extsize set";
+ err = -EINVAL;
+ goto error;
+ }
+
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto error;
+ }
+
+ struct ext4_iloc iloc;
+ err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err < 0)
+ goto error_journal;
+
+ if (set_forcealign) {
+ ext4_set_inode_flag(inode, EXT4_INODE_FORCEALIGN);
+ } else if (unset_forcealign)
+ ext4_clear_inode_flag(inode, EXT4_INODE_FORCEALIGN);
+
+ inode_set_ctime_current(inode);
+ inode_inc_iversion(inode);
+
+ err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+ if (err < 0)
+ goto error_journal;
+
+ err = ext4_journal_stop(handle);
+ if (err < 0)
+ goto error;
+
+ return 0;
+error_journal:
+ if (handle)
+ ext4_journal_stop(handle);
+error:
+ if (msg)
+ ext4_warning_inode(inode, "%s\n", msg);
+
+ return err;
+}
+
#ifdef CONFIG_QUOTA
static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
{
@@ -1088,6 +1149,9 @@ int ext4_fileattr_get(struct dentry *dentry, struct fileattr *fa)
fa->fsx_xflags |= FS_XFLAG_EXTSIZE;
}
+ if (ext4_test_inode_flag(inode, EXT4_INODE_FORCEALIGN))
+ fa->fsx_xflags |= FS_XFLAG_FORCEALIGN;
+
return 0;
}
@@ -1144,6 +1208,11 @@ int ext4_fileattr_set(struct mnt_idmap *idmap,
goto out;
fa->fsx_xflags = 0;
}
+
+ err = ext4_ioctl_setforcealign(inode,
+ (fa->fsx_xflags & FS_XFLAG_FORCEALIGN));
+ if (err)
+ goto out;
out:
return err;
}
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index 2bbe00cf1248..944fa77ce18e 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -167,7 +167,9 @@ struct fsxattr {
#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
-#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
+/* data extent mappings for regular files must be aligned to extent size hint */
+#define FS_XFLAG_FORCEALIGN 0x00020000
+#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
/* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
@@ -295,7 +297,7 @@ struct fsxattr {
#define FS_EXTENT_FL 0x00080000 /* Extents */
#define FS_VERITY_FL 0x00100000 /* Verity protected inode */
#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
-#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
+/* Was previously FS_EOFBLOCKS_FL (reserved for ext4) */
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#define FS_DAX_FL 0x02000000 /* Inode is DAX */
#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
--
2.48.1
Powered by blists - more mailing lists