lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d304a337c17ba42092d7475ff1374bc481f72b32.1762945505.git.ojaswin@linux.ibm.com>
Date: Wed, 12 Nov 2025 16:36:06 +0530
From: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
To: Christian Brauner <brauner@...nel.org>, djwong@...nel.org,
        ritesh.list@...il.com, john.g.garry@...cle.com, tytso@....edu,
        willy@...radead.org, dchinner@...hat.com, hch@....de
Cc: linux-xfs@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-ext4@...r.kernel.org, linux-fsdevel@...r.kernel.org,
        linux-mm@...ck.org, jack@...e.cz, nilay@...ux.ibm.com,
        martin.petersen@...cle.com, rostedt@...dmis.org, axboe@...nel.dk,
        linux-block@...r.kernel.org, linux-trace-kernel@...r.kernel.org
Subject: [RFC PATCH 3/8] fs: Add initial buffered atomic write support info to statx

Extend statx system call to return additional info for buffered atomic
write support for a file. Currently only direct IO is supported.

New flags STATX_WRITE_ATOMIC_BUF and STATX_ATTR_WRITE_ATOMIC_BUF are for
indicating whether the file knows and supports buffered atomic writes.

Structure statx members stx_atomic_write_unit_{min, max, segments_max}
will be reused for bufferd atomic writes. Flags STATX_WRITE_ATOMIC_DIO
and STATX_WRITE_ATOMIC_BUF are mutually exclusive. With both flags set,
statx will ignore the request and neither fields in statx.result_mask
will be set.

Also, make sure ext4 and xfs report atomic write unit min and max of 0
when the new flag is passed.

Co-developed-by: John Garry <john.g.garry@...cle.com>
Signed-off-by: John Garry <john.g.garry@...cle.com>
Signed-off-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
---
 block/bdev.c                                  |   3 +-
 fs/ext4/inode.c                               |   7 +-
 fs/stat.c                                     |  33 +++--
 fs/xfs/xfs_file.c                             |   9 +-
 fs/xfs/xfs_iops.c                             | 121 ++++++++++--------
 fs/xfs/xfs_iops.h                             |   6 +-
 include/linux/fs.h                            |   3 +-
 include/trace/misc/fs.h                       |   1 +
 include/uapi/linux/stat.h                     |   2 +
 tools/include/uapi/linux/stat.h               |   2 +
 .../trace/beauty/include/uapi/linux/stat.h    |   2 +
 11 files changed, 119 insertions(+), 70 deletions(-)

diff --git a/block/bdev.c b/block/bdev.c
index 3bc90d5feb4c..8f0eab0a1ecf 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -1335,8 +1335,7 @@ void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
 
 		generic_fill_statx_atomic_writes(stat,
 			queue_atomic_write_unit_min_bytes(bd_queue),
-			queue_atomic_write_unit_max_bytes(bd_queue),
-			0);
+			queue_atomic_write_unit_max_bytes(bd_queue), 0, true);
 	}
 
 	stat->blksize = bdev_io_min(bdev);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9555149a8ba6..0d5013993fba 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6106,8 +6106,11 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
 			awu_max = sbi->s_awu_max;
 		}
 
-		generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0);
-	}
+		generic_fill_statx_atomic_writes(stat, awu_min, awu_max, 0,
+						 true);
+	} else if (request_mask & STATX_WRITE_ATOMIC_BUF)
+		/* Atomic writes for buferred IO not supported yet */
+		generic_fill_statx_atomic_writes(stat, 0, 0, 0, false);
 
 	flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
 	if (flags & EXT4_APPEND_FL)
diff --git a/fs/stat.c b/fs/stat.c
index 7eb2a247ab67..8ba3993dcd09 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -137,20 +137,27 @@ EXPORT_SYMBOL(generic_fill_statx_attr);
  * @unit_min:	Minimum supported atomic write length in bytes
  * @unit_max:	Maximum supported atomic write length in bytes
  * @unit_max_opt: Optimised maximum supported atomic write length in bytes
+ * @is_dio:	Is the stat request for dio
  *
- * Fill in the STATX{_ATTR}_WRITE_ATOMIC_DIO flags in the kstat structure from
- * atomic write unit_min and unit_max values.
+ * Fill in the STATX{_ATTR}_WRITE_ATOMIC_{DIO,BUF} flags in the kstat structure
+ * from atomic write unit_min and unit_max values.
  */
 void generic_fill_statx_atomic_writes(struct kstat *stat,
 				      unsigned int unit_min,
 				      unsigned int unit_max,
-				      unsigned int unit_max_opt)
+				      unsigned int unit_max_opt,
+				      bool is_dio)
 {
-	/* Confirm that the request type is known */
-	stat->result_mask |= STATX_WRITE_ATOMIC_DIO;
+	if (is_dio) {
+		/* Confirm that the request type is known */
+		stat->result_mask |= STATX_WRITE_ATOMIC_DIO;
 
-	/* Confirm that the file attribute type is known */
-	stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO;
+		/* Confirm that the file attribute type is known */
+		stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_DIO;
+	} else {
+		stat->result_mask |= STATX_WRITE_ATOMIC_BUF;
+		stat->attributes_mask |= STATX_ATTR_WRITE_ATOMIC_BUF;
+	}
 
 	if (unit_min) {
 		stat->atomic_write_unit_min = unit_min;
@@ -160,7 +167,10 @@ void generic_fill_statx_atomic_writes(struct kstat *stat,
 		stat->atomic_write_segments_max = 1;
 
 		/* Confirm atomic writes are actually supported */
-		stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO;
+		if (is_dio)
+			stat->attributes |= STATX_ATTR_WRITE_ATOMIC_DIO;
+		else
+			stat->attributes |= STATX_ATTR_WRITE_ATOMIC_BUF;
 	}
 }
 EXPORT_SYMBOL_GPL(generic_fill_statx_atomic_writes);
@@ -206,6 +216,13 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
 	stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
 				  STATX_ATTR_DAX);
 
+	if (request_mask & STATX_WRITE_ATOMIC_BUF &&
+	    request_mask & STATX_WRITE_ATOMIC_DIO) {
+		/* Both are mutually exclusive, disable them */
+		request_mask &=
+			~(STATX_WRITE_ATOMIC_BUF | STATX_WRITE_ATOMIC_DIO);
+	}
+
 	idmap = mnt_idmap(path->mnt);
 	if (inode->i_op->getattr) {
 		int ret;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 5b9864c8582e..3efa575570ed 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1087,6 +1087,7 @@ xfs_file_write_iter(
 	struct xfs_inode	*ip = XFS_I(inode);
 	ssize_t			ret;
 	size_t			ocount = iov_iter_count(from);
+	bool is_dio = iocb->ki_flags & IOCB_DIRECT;
 
 	XFS_STATS_INC(ip->i_mount, xs_write_calls);
 
@@ -1097,10 +1098,10 @@ xfs_file_write_iter(
 		return -EIO;
 
 	if (iocb->ki_flags & IOCB_ATOMIC) {
-		if (ocount < xfs_get_atomic_write_min(ip))
+		if (ocount < xfs_get_atomic_write_min(ip, is_dio))
 			return -EINVAL;
 
-		if (ocount > xfs_get_atomic_write_max(ip))
+		if (ocount > xfs_get_atomic_write_max(ip, is_dio))
 			return -EINVAL;
 
 		ret = generic_atomic_write_valid(iocb, from);
@@ -1111,7 +1112,7 @@ xfs_file_write_iter(
 	if (IS_DAX(inode))
 		return xfs_file_dax_write(iocb, from);
 
-	if (iocb->ki_flags & IOCB_DIRECT) {
+	if (is_dio) {
 		/*
 		 * Allow a directio write to fall back to a buffered
 		 * write *only* in the case that we're doing a reflink
@@ -1568,7 +1569,7 @@ xfs_file_open(
 	if (xfs_is_shutdown(XFS_M(inode->i_sb)))
 		return -EIO;
 	file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
-	if (xfs_get_atomic_write_min(XFS_I(inode)) > 0)
+	if (xfs_get_atomic_write_min(XFS_I(inode), file->f_flags & O_DIRECT) > 0)
 		file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
 	return generic_file_open(inode, file);
 }
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index f41fcdd3043b..f036c46b19c5 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -601,81 +601,99 @@ xfs_report_dioalign(
 
 unsigned int
 xfs_get_atomic_write_min(
-	struct xfs_inode	*ip)
+	struct xfs_inode	*ip,
+	bool			is_dio)
 {
-	struct xfs_mount	*mp = ip->i_mount;
+	if (is_dio) {
+		struct xfs_mount *mp = ip->i_mount;
 
-	/*
-	 * If we can complete an atomic write via atomic out of place writes,
-	 * then advertise a minimum size of one fsblock.  Without this
-	 * mechanism, we can only guarantee atomic writes up to a single LBA.
-	 *
-	 * If out of place writes are not available, we can guarantee an atomic
-	 * write of exactly one single fsblock if the bdev will make that
-	 * guarantee for us.
-	 */
-	if (xfs_inode_can_hw_atomic_write(ip) ||
-	    xfs_inode_can_sw_atomic_write(ip))
-		return mp->m_sb.sb_blocksize;
+		/*
+		 * If we can complete an atomic write via atomic out of place writes,
+		 * then advertise a minimum size of one fsblock.  Without this
+		 * mechanism, we can only guarantee atomic writes up to a single LBA.
+		 *
+		 * If out of place writes are not available, we can guarantee an atomic
+		 * write of exactly one single fsblock if the bdev will make that
+		 * guarantee for us.
+		 */
+		if (xfs_inode_can_hw_atomic_write(ip) ||
+		    xfs_inode_can_sw_atomic_write(ip))
+			return mp->m_sb.sb_blocksize;
+	}
 
+	/* buffered IO not supported yet so return 0 right away */
 	return 0;
 }
 
 unsigned int
 xfs_get_atomic_write_max(
-	struct xfs_inode	*ip)
+	struct xfs_inode	*ip,
+	bool			is_dio)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 
-	/*
-	 * If out of place writes are not available, we can guarantee an atomic
-	 * write of exactly one single fsblock if the bdev will make that
-	 * guarantee for us.
-	 */
-	if (!xfs_inode_can_sw_atomic_write(ip)) {
-		if (xfs_inode_can_hw_atomic_write(ip))
-			return mp->m_sb.sb_blocksize;
-		return 0;
+	if (is_dio) {
+		/*
+		 * If out of place writes are not available, we can guarantee an atomic
+		 * write of exactly one single fsblock if the bdev will make that
+		 * guarantee for us.
+		 */
+		if (!xfs_inode_can_sw_atomic_write(ip)) {
+			if (xfs_inode_can_hw_atomic_write(ip))
+				return mp->m_sb.sb_blocksize;
+			return 0;
+		}
+
+		/*
+		 * If we can complete an atomic write via atomic out of place writes,
+		 * then advertise a maximum size of whatever we can complete through
+		 * that means.  Hardware support is reported via max_opt, not here.
+		 */
+		if (XFS_IS_REALTIME_INODE(ip))
+			return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
+		return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
 	}
 
-	/*
-	 * If we can complete an atomic write via atomic out of place writes,
-	 * then advertise a maximum size of whatever we can complete through
-	 * that means.  Hardware support is reported via max_opt, not here.
-	 */
-	if (XFS_IS_REALTIME_INODE(ip))
-		return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].awu_max);
-	return XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_AG].awu_max);
+	/* buffered IO not supported yet so return 0 right away */
+	return 0;
 }
 
 unsigned int
 xfs_get_atomic_write_max_opt(
-	struct xfs_inode	*ip)
+	struct xfs_inode	*ip,
+	bool			is_dio)
 {
-	unsigned int		awu_max = xfs_get_atomic_write_max(ip);
+	if (is_dio) {
+		unsigned int awu_max = xfs_get_atomic_write_max(ip, is_dio);
 
-	/* if the max is 1x block, then just keep behaviour that opt is 0 */
-	if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
-		return 0;
+		/* if the max is 1x block, then just keep behaviour that opt is 0 */
+		if (awu_max <= ip->i_mount->m_sb.sb_blocksize)
+			return 0;
 
-	/*
-	 * Advertise the maximum size of an atomic write that we can tell the
-	 * block device to perform for us.  In general the bdev limit will be
-	 * less than our out of place write limit, but we don't want to exceed
-	 * the awu_max.
-	 */
-	return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
+		/*
+		 * Advertise the maximum size of an atomic write that we can tell the
+		 * block device to perform for us.  In general the bdev limit will be
+		 * less than our out of place write limit, but we don't want to exceed
+		 * the awu_max.
+		 */
+		return min(awu_max, xfs_inode_buftarg(ip)->bt_awu_max);
+	}
+
+	/* buffered IO not supported yet so return 0 right away */
+	return 0;
 }
 
 static void
 xfs_report_atomic_write(
 	struct xfs_inode	*ip,
-	struct kstat		*stat)
+	struct kstat		*stat,
+	bool			is_dio)
 {
 	generic_fill_statx_atomic_writes(stat,
-			xfs_get_atomic_write_min(ip),
-			xfs_get_atomic_write_max(ip),
-			xfs_get_atomic_write_max_opt(ip));
+					 xfs_get_atomic_write_min(ip, is_dio),
+					 xfs_get_atomic_write_max(ip, is_dio),
+					 xfs_get_atomic_write_max_opt(ip, is_dio),
+					 is_dio);
 }
 
 STATIC int
@@ -741,8 +759,11 @@ xfs_vn_getattr(
 	case S_IFREG:
 		if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN))
 			xfs_report_dioalign(ip, stat);
-		if (request_mask & STATX_WRITE_ATOMIC_DIO)
-			xfs_report_atomic_write(ip, stat);
+		if (request_mask &
+		    (STATX_WRITE_ATOMIC_DIO | STATX_WRITE_ATOMIC_BUF))
+			xfs_report_atomic_write(ip, stat,
+						(request_mask &
+						 STATX_WRITE_ATOMIC_DIO));
 		fallthrough;
 	default:
 		stat->blksize = xfs_stat_blksize(ip);
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 0896f6b8b3b8..09e79263add1 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -19,8 +19,8 @@ int xfs_inode_init_security(struct inode *inode, struct inode *dir,
 extern void xfs_setup_inode(struct xfs_inode *ip);
 extern void xfs_setup_iops(struct xfs_inode *ip);
 extern void xfs_diflags_to_iflags(struct xfs_inode *ip, bool init);
-unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip);
-unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip);
-unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip);
+unsigned int xfs_get_atomic_write_min(struct xfs_inode *ip, bool is_dio);
+unsigned int xfs_get_atomic_write_max(struct xfs_inode *ip, bool is_dio);
+unsigned int xfs_get_atomic_write_max_opt(struct xfs_inode *ip, bool is_dio);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c895146c1444..2dec66913e97 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3563,7 +3563,8 @@ void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
 void generic_fill_statx_atomic_writes(struct kstat *stat,
 				      unsigned int unit_min,
 				      unsigned int unit_max,
-				      unsigned int unit_max_opt);
+				      unsigned int unit_max_opt,
+				      bool is_dio);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
diff --git a/include/trace/misc/fs.h b/include/trace/misc/fs.h
index 19ea9339b9bd..3b69910a5998 100644
--- a/include/trace/misc/fs.h
+++ b/include/trace/misc/fs.h
@@ -162,4 +162,5 @@
 		{ STATX_MNT_ID_UNIQUE,	"MNT_ID_UNIQUE" },	\
 		{ STATX_SUBVOL,		"SUBVOL" },		\
 		{ STATX_WRITE_ATOMIC_DIO,	"WRITE_ATOMIC_DIO" },   \
+		{ STATX_WRITE_ATOMIC_BUF,	"WRITE_ATOMIC_BUF" },   \
 		{ STATX_DIO_READ_ALIGN,	"DIO_READ_ALIGN" })
diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h
index 57f558be933e..2d77da04df23 100644
--- a/include/uapi/linux/stat.h
+++ b/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
 /* Old name kept for backward compatibility */
 #define STATX_WRITE_ATOMIC	STATX_WRITE_ATOMIC_DIO
 #define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
+#define STATX_WRITE_ATOMIC_BUF	0x00040000U	/* Want/got buf-io atomic_write_* fields */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
@@ -259,6 +260,7 @@ struct statx {
 #define STATX_ATTR_WRITE_ATOMIC_DIO	0x00400000 /* File supports dio atomic write operations */
 /* Old name kept for backward compatibility */
 #define STATX_ATTR_WRITE_ATOMIC	STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF	0x00800000 /* File supports buf-io atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 57f558be933e..a7e0036669c2 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
 /* Old name kept for backward compatibility */
 #define STATX_WRITE_ATOMIC	STATX_WRITE_ATOMIC_DIO
 #define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
+#define STATX_WRITE_ATOMIC_BUF  0x00040000U	/* Want/got buf-io atomic_write_* fields */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
@@ -259,6 +260,7 @@ struct statx {
 #define STATX_ATTR_WRITE_ATOMIC_DIO	0x00400000 /* File supports dio atomic write operations */
 /* Old name kept for backward compatibility */
 #define STATX_ATTR_WRITE_ATOMIC	STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF	0x00800000 /* File supports buf-io atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 57f558be933e..2d77da04df23 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -221,6 +221,7 @@ struct statx {
 /* Old name kept for backward compatibility */
 #define STATX_WRITE_ATOMIC	STATX_WRITE_ATOMIC_DIO
 #define STATX_DIO_READ_ALIGN	0x00020000U	/* Want/got dio read alignment info */
+#define STATX_WRITE_ATOMIC_BUF	0x00040000U	/* Want/got buf-io atomic_write_* fields */
 
 #define STATX__RESERVED		0x80000000U	/* Reserved for future struct statx expansion */
 
@@ -259,6 +260,7 @@ struct statx {
 #define STATX_ATTR_WRITE_ATOMIC_DIO	0x00400000 /* File supports dio atomic write operations */
 /* Old name kept for backward compatibility */
 #define STATX_ATTR_WRITE_ATOMIC	STATX_ATTR_WRITE_ATOMIC_DIO
+#define STATX_ATTR_WRITE_ATOMIC_BUF	0x00800000 /* File supports buf-io atomic write operations */
 
 
 #endif /* _UAPI_LINUX_STAT_H */
-- 
2.51.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ