[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d304a337c17ba42092d7475ff1374bc481f72b32.1762945505.git.ojaswin@linux.ibm.com>
Date: Wed, 12 Nov 2025 16:36:06 +0530
From: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
To: Christian Brauner <brauner@...nel.org>, djwong@...nel.org,
ritesh.list@...il.com, john.g.garry@...cle.com, tytso@....edu,
willy@...radead.org, dchinner@...hat.com, hch@....de
Cc: linux-xfs@...r.kernel.org, linux-kernel@...r.kernel.org,
linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org, jack@...e.cz, nilay@...ux.ibm.com,
martin.petersen@...cle.com, rostedt@...dmis.org, axboe@...nel.dk,
linux-block@...r.kernel.org, linux-trace-kernel@...r.kernel.org
Subject: [RFC PATCH 3/8] fs: Add initial buffered atomic write support info to statx
Extend statx system call to return additional info for buffered atomic
write support for a file. Currently only direct IO is supported.
New flags STATX_WRITE_ATOMIC_BUF and STATX_ATTR_WRITE_ATOMIC_BUF are for
indicating whether the file knows and supports buffered atomic writes.
Structure statx members stx_atomic_write_unit_{min, max, segments_max}
will be reused for bufferd atomic writes. Flags STATX_WRITE_ATOMIC_DIO
and STATX_WRITE_ATOMIC_BUF are mutually exclusive. With both flags set,
statx will ignore the request and neither fields in statx.result_mask
will be set.
Also, make sure ext4 and xfs report atomic write unit min and max of 0
when the new flag is passed.
Co-developed-by: John Garry <john.g.garry@...cle.com>
Signed-off-by: John Garry <john.g.garry@...cle.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
---
block/bdev.c | 3 +-
fs/ext4/inode.c | 7 +-
fs/stat.c | 33 +++--
fs/xfs/xfs_file.c | 9 +-
fs/xfs/xfs_iops.c | 121 ++++++++++--------
fs/xfs/xfs_iops.h | 6 +-
include/linux/fs.h | 3 +-
include/trace/misc/fs.h | 1 +
include/uapi/linux/stat.h | 2 +
tools/include/uapi/linux/stat.h | 2 +
.../trace/beauty/include/uapi/linux/stat.h | 2 +
11 files changed, 119 insertions(+), 70 deletions(-)
diff --git a/block/bdev.c b/block/bdev.c
index 3bc90d5feb4c..8f0eab0a1ecf 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1335,8 +1335,7 @@ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
generic_fill_statx_atomic_writes(stat,
queue_atomic_write_unit_min_bytes(bd_queue),
- queue_atomic_write_unit_max_bytes(bd_queue),
- 0);
+ queue_atomic_write_unit_max_bytes(bd_queue), 0, true);
}
stat->blksize = bdev_io_min(bdev);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9555149a8ba6..0d5013993fba 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6106,8 +6106,11 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
awu_max = sbi->s_awu_max;
}
- generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
- }
+ generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0,
+ true);
+ } else if (request_mask & STATX_WRITE_ATOMIC_BUF)
+ /* Atomic writes for buferred IO not supported yet */
+ generic_fill_statx_atomic_writes(stat, 0, 0, 0, false);
flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
if (flags & EXT4_APPEND_FL)
diff --git a/fs/stat.c b/fs/stat.c
index 7eb2a247ab67..8ba3993dcd09 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -137,20 +137,27 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
* @unit_min: Minimum supported atomic write length in bytes
* @unit_max: Maximum supported atomic write length in bytes
* @unit_max_opt: Optimised maximum supported atomic write length in bytes
+ * @is_dio: Is the stat request for dio
*
- * Fill in the STATX{_ATTR}_WRITE_ATOMIC_DIO flags in the kstat structure from
- * atomic write unit_min and unit_max values.
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC_{DIO,BUF} flags in the kstat structure
+ * from atomic write unit_min and unit_max values.
*/
void generic_fill_statx_atomic_writes(struct kstat *stat,
unsigned int unit_min,
unsigned int unit_max,
- unsigned int unit_max_opt)
+ unsigned int unit_max_opt,
+ bool is_dio)
{
- /* Confirm that the request type is known */
- stat->result_mask |= STATX_WRITE_ATOMIC_DIO;
+ if (is_dio) {
+ /* Confirm that the request type is known */
+ stat->result_mask |= STATX_WRITE_ATOMIC_DIO;
- /* Confirm that the file attribute type is known */
- stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO;
+ /* Confirm that the file attribute type is known */
+ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO;
+ } else {
+ stat->result_mask |= STATX_WRITE_ATOMIC_BUF;
+ stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_BUF;
+ }
if (unit_min) {
stat->atomic_write_unit_min = unit_min;
@@ -160,7 +167,10 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
stat->atomic_write_segments_max = 1;
/* Confirm atomic writes are actually supported */
- stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO;
+ if (is_dio)
+ stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO;
+ else
+ stat->attributes |= STATX_ATTR_WRITE_ATOMIC_BUF;
}
}
EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes);
@@ -206,6 +216,13 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
STATX_ATTR_DAX);
+ if (request_mask & STATX_WRITE_ATOMIC_BUF &&
+ request_mask & STATX_WRITE_ATOMIC_DIO) {
+ /* Both are mutually exclusive, disable them */
+ request_mask &=
+ ~(STATX_WRITE_ATOMIC_BUF | STATX_WRITE_ATOMIC_DIO);
+ }
+
idmap = mnt_idmap(path->mnt);
if (inode->i_op->getattr) {
int ret;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5b9864c8582e..3efa575570ed 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1087,6 +1087,7 @@ xfs_file_write_iter(
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
size_t ocount = iov_iter_count(from);
+ bool is_dio = iocb->ki_flags & IOCB_DIRECT;
XFS_STATS_INC(ip->i_mount, xs_write_calls);
@@ -1097,10 +1098,10 @@ xfs_file_write_iter(
return -EIO;
if (iocb->ki_flags & IOCB_ATOMIC) {
- if (ocount < xfs_get_atomic_write_min(ip))
+ if (ocount < xfs_get_atomic_write_min(ip, is_dio))
return -EINVAL;
- if (ocount > xfs_get_atomic_write_max(ip))
+ if (ocount > xfs_get_atomic_write_max(ip, is_dio))
return -EINVAL;
ret = generic_atomic_write_valid(iocb, from);
@@ -1111,7 +1112,7 @@ xfs_file_write_iter(
if (IS_DAX(inode))
return xfs_file_dax_write(iocb, from);
- if (iocb->ki_flags & IOCB_DIRECT) {
+ if (is_dio) {
/*
* Allow a directio write to fall back to a buffered
* write *only* in the case that we're doing a reflink
@@ -1568,7 +1569,7 @@ xfs_file_open(
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
- if (xfs_get_atomic_write_min(XFS_I(inode)) > 0)
+ if (xfs_get_atomic_write_min(XFS_I(inode), file->f_flags & O_DIRECT) > 0)
file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
return generic_file_open(inode, file);
}
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f41fcdd3043b..f036c46b19c5 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -601,81 +601,99 @@ xfs_report_dioalign(
unsigned int
xfs_get_atomic_write_min(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ bool is_dio)
{
- struct xfs_mount *mp = ip->i_mount;
+ if (is_dio) {
+ struct xfs_mount *mp = ip->i_mount;
- /*
- * If we can complete an atomic write via atomic out of place writes,
- * then advertise a minimum size of one fsblock. Without this
- * mechanism, we can only guarantee atomic writes up to a single LBA.
- *
- * If out of place writes are not available, we can guarantee an atomic
- * write of exactly one single fsblock if the bdev will make that
- * guarantee for us.
- */
- if (xfs_inode_can_hw_atomic_write(ip) ||
- xfs_inode_can_sw_atomic_write(ip))
- return mp->m_sb.sb_blocksize;
+ /*
+ * If we can complete an atomic write via atomic out of place writes,
+ * then advertise a minimum size of one fsblock. Without this
+ * mechanism, we can only guarantee atomic writes up to a single LBA.
+ *
+ * If out of place writes are not available, we can guarantee an atomic
+ * write of exactly one single fsblock if the bdev will make that
+ * guarantee for us.
+ */
+ if (xfs_inode_can_hw_atomic_write(ip) ||
+ xfs_inode_can_sw_atomic_write(ip))
+ return mp->m_sb.sb_blocksize;
+ }
+ /* buffered IO not supported yet so return 0 right away */
return 0;
}
unsigned int
xfs_get_atomic_write_max(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ bool is_dio)
{
struct xfs_mount *mp = ip->i_mount;
- /*
- * If out of place writes are not available, we can guarantee an atomic
- * write of exactly one single fsblock if the bdev will make that
- * guarantee for us.
- */
- if (!xfs_inode_can_sw_atomic_write(ip)) {
- if (xfs_inode_can_hw_atomic_write(ip))
- return mp->m_sb.sb_blocksize;
- return 0;
+ if (is_dio) {
+ /*
+ * If out of place writes are not available, we can guarantee an atomic
+ * write of exactly one single fsblock if the bdev will make that
+ * guarantee for us.
+ */
+ if (!xfs_inode_can_sw_atomic_write(ip)) {
+ if (xfs_inode_can_hw_atomic_write(ip))
+ return mp->m_sb.sb_blocksize;
+ return 0;
+ }
+
+ /*
+ * If we can complete an atomic write via atomic out of place writes,
+ * then advertise a maximum size of whatever we can complete through
+ * that means. Hardware support is reported via max_opt, not here.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
+ return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
}
- /*
- * If we can complete an atomic write via atomic out of place writes,
- * then advertise a maximum size of whatever we can complete through
- * that means. Hardware support is reported via max_opt, not here.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
- return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
+ /* buffered IO not supported yet so return 0 right away */
+ return 0;
}
unsigned int
xfs_get_atomic_write_max_opt(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ bool is_dio)
{
- unsigned int awu_max = xfs_get_atomic_write_max(ip);
+ if (is_dio) {
+ unsigned int awu_max = xfs_get_atomic_write_max(ip, is_dio);
- /* if the max is 1x block, then just keep behaviour that opt is 0 */
- if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
- return 0;
+ /* if the max is 1x block, then just keep behaviour that opt is 0 */
+ if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
+ return 0;
- /*
- * Advertise the maximum size of an atomic write that we can tell the
- * block device to perform for us. In general the bdev limit will be
- * less than our out of place write limit, but we don't want to exceed
- * the awu_max.
- */
- return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
+ /*
+ * Advertise the maximum size of an atomic write that we can tell the
+ * block device to perform for us. In general the bdev limit will be
+ * less than our out of place write limit, but we don't want to exceed
+ * the awu_max.
+ */
+ return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
+ }
+
+ /* buffered IO not supported yet so return 0 right away */
+ return 0;
}
static void
xfs_report_atomic_write(
struct xfs_inode *ip,
- struct kstat *stat)
+ struct kstat *stat,
+ bool is_dio)
{
generic_fill_statx_atomic_writes(stat,
- xfs_get_atomic_write_min(ip),
- xfs_get_atomic_write_max(ip),
- xfs_get_atomic_write_max_opt(ip));
+ xfs_get_atomic_write_min(ip, is_dio),
+ xfs_get_atomic_write_max(ip, is_dio),
+ xfs_get_atomic_write_max_opt(ip, is_dio),
+ is_dio);
}
STATIC int
@@ -741,8 +759,11 @@ xfs_vn_getattr(
case S_IFREG:
if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
xfs_report_dioalign(ip, stat);
- if (request_mask & STATX_WRITE_ATOMIC_DIO)
- xfs_report_atomic_write(ip, stat);
+ if (request_mask &
+ (STATX_WRITE_ATOMIC_DIO | STATX_WRITE_ATOMIC_BUF))
+ xfs_report_atomic_write(ip, stat,
+ (request_mask &
+ STATX_WRITE_ATOMIC_DIO));
fallthrough;
default:
stat->blksize = xfs_stat_blksize(ip);
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 0896f6b8b3b8..09e79263add1 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -19,8 +19,8 @@ int xfs_inode_init_security(struct inode *inode, struct inode *dir,
extern void xfs_setup_inode(struct xfs_inode *ip);
extern void xfs_setup_iops(struct xfs_inode *ip);
extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
-unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip);
-unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip);
-unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip);
+unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip, bool is_dio);
+unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip, bool is_dio);
+unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip, bool is_dio);
#endif /* __XFS_IOPS_H__ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..2dec66913e97 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3563,7 +3563,8 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
void generic_fill_statx_atomic_writes(struct kstat *stat,
unsigned int unit_min,
unsigned int unit_max,
- unsigned int unit_max_opt);
+ unsigned int unit_max_opt,
+ bool is_dio);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h
index 19ea9339b9bd..3b69910a5998 100644
--- a/include/trace/misc/fs.h
+++ b/include/trace/misc/fs.h
@@ -162,4 +162,5 @@
{ STATX_MNT_ID_UNIQUE, "MNT_ID_UNIQUE" }, \
{ STATX_SUBVOL, "SUBVOL" }, \
{ STATX_WRITE_ATOMIC_DIO, "WRITE_ATOMIC_DIO" }, \
+ { STATX_WRITE_ATOMIC_BUF, "WRITE_ATOMIC_BUF" }, \
{ STATX_DIO_READ_ALIGN, "DIO_READ_ALIGN" })
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 57f558be933e..2d77da04df23 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
/* Old name kept for backward compatibility */
#define STATX_WRITE_ATOMIC STATX_WRITE_ATOMIC_DIO
#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
+#define STATX_WRITE_ATOMIC_BUF 0x00040000U /* Want/got buf-io atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -259,6 +260,7 @@ struct statx {
#define STATX_ATTR_WRITE_ATOMIC_DIO 0x00400000 /* File supports dio atomic write operations */
/* Old name kept for backward compatibility */
#define STATX_ATTR_WRITE_ATOMIC STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF 0x00800000 /* File supports buf-io atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 57f558be933e..a7e0036669c2 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
/* Old name kept for backward compatibility */
#define STATX_WRITE_ATOMIC STATX_WRITE_ATOMIC_DIO
#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
+#define STATX_WRITE_ATOMIC_BUF 0x00040000U /* Want/got buf-io atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -259,6 +260,7 @@ struct statx {
#define STATX_ATTR_WRITE_ATOMIC_DIO 0x00400000 /* File supports dio atomic write operations */
/* Old name kept for backward compatibility */
#define STATX_ATTR_WRITE_ATOMIC STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF 0x00800000 /* File supports buf-io atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 57f558be933e..2d77da04df23 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
/* Old name kept for backward compatibility */
#define STATX_WRITE_ATOMIC STATX_WRITE_ATOMIC_DIO
#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */
+#define STATX_WRITE_ATOMIC_BUF 0x00040000U /* Want/got buf-io atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -259,6 +260,7 @@ struct statx {
#define STATX_ATTR_WRITE_ATOMIC_DIO 0x00400000 /* File supports dio atomic write operations */
/* Old name kept for backward compatibility */
#define STATX_ATTR_WRITE_ATOMIC STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF 0x00800000 /* File supports buf-io atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
--
2.51.0
Powered by blists - more mailing lists