lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Message-Id: <20250506090427.2549456-14-john.g.garry@oracle.com> Date: Tue, 6 May 2025 09:04:23 +0000 From: John Garry <john.g.garry@...cle.com> To: brauner@...nel.org, djwong@...nel.org, hch@....de, viro@...iv.linux.org.uk, jack@...e.cz, cem@...nel.org Cc: linux-fsdevel@...r.kernel.org, dchinner@...hat.com, linux-xfs@...r.kernel.org, linux-kernel@...r.kernel.org, ojaswin@...ux.ibm.com, ritesh.list@...il.com, martin.petersen@...cle.com, linux-ext4@...r.kernel.org, linux-block@...r.kernel.org, catherine.hoang@...cle.com, linux-api@...r.kernel.org, John Garry <john.g.garry@...cle.com> Subject: [PATCH v12 13/17] xfs: commit CoW-based atomic writes atomically When completing a CoW-based write, each extent range mapping update is covered by a separate transaction. For a CoW-based atomic write, all mappings must be changed at once, so change to use a single transaction. Note that there is a limit on the amount of log intent items which can be fit into a single transaction, but this is being ignored for now since the count of items for a typical atomic write would be much less than is typically supported. A typical atomic write would be expected to be 64KB or less, which means only 16 possible extents unmaps, which is quite small. Reviewed-by: Darrick J. Wong <djwong@...nel.org> [djwong: add tr_atomic_ioend] Reviewed-by: Christoph Hellwig <hch@....de> Signed-off-by: Darrick J. Wong <djwong@...nel.org> Signed-off-by: John Garry <john.g.garry@...cle.com> --- fs/xfs/libxfs/xfs_log_rlimit.c | 4 +++ fs/xfs/libxfs/xfs_trans_resv.c | 15 +++++++++ fs/xfs/libxfs/xfs_trans_resv.h | 1 + fs/xfs/xfs_file.c | 5 ++- fs/xfs/xfs_reflink.c | 56 ++++++++++++++++++++++++++++++++++ fs/xfs/xfs_reflink.h | 2 ++ 6 files changed, 82 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c index d3bd6a86c8fe..34bba96d30ca 100644 --- a/fs/xfs/libxfs/xfs_log_rlimit.c +++ b/fs/xfs/libxfs/xfs_log_rlimit.c @@ -91,6 +91,7 @@ xfs_log_calc_trans_resv_for_minlogblocks( */ if (xfs_want_minlogsize_fixes(&mp->m_sb)) { xfs_trans_resv_calc(mp, resv); + resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend; return; } @@ -107,6 +108,9 @@ xfs_log_calc_trans_resv_for_minlogblocks( xfs_trans_resv_calc(mp, resv); + /* Copy the dynamic transaction reservation types from the running fs */ + resv->tr_atomic_ioend = M_RES(mp)->tr_atomic_ioend; + if (xfs_has_reflink(mp)) { /* * In the early days of reflink, typical log operation counts diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 580d00ae2857..a841432abf83 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -1284,6 +1284,15 @@ xfs_calc_namespace_reservations( resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; } +STATIC void +xfs_calc_default_atomic_ioend_reservation( + struct xfs_mount *mp, + struct xfs_trans_resv *resp) +{ + /* Pick a default that will scale reasonably for the log size. */ + resp->tr_atomic_ioend = resp->tr_itruncate; +} + void xfs_trans_resv_calc( struct xfs_mount *mp, @@ -1378,4 +1387,10 @@ xfs_trans_resv_calc( resp->tr_itruncate.tr_logcount += logcount_adj; resp->tr_write.tr_logcount += logcount_adj; resp->tr_qm_dqalloc.tr_logcount += logcount_adj; + + /* + * Now that we've finished computing the static reservations, we can + * compute the dynamic reservation for atomic writes. + */ + xfs_calc_default_atomic_ioend_reservation(mp, resp); } diff --git a/fs/xfs/libxfs/xfs_trans_resv.h b/fs/xfs/libxfs/xfs_trans_resv.h index d9d0032cbbc5..670045d417a6 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.h +++ b/fs/xfs/libxfs/xfs_trans_resv.h @@ -48,6 +48,7 @@ struct xfs_trans_resv { struct xfs_trans_res tr_qm_dqalloc; /* allocate quota on disk */ struct xfs_trans_res tr_sb; /* modify superblock */ struct xfs_trans_res tr_fsyncts; /* update timestamps on fsync */ + struct xfs_trans_res tr_atomic_ioend; /* untorn write completion */ }; /* shorthand way of accessing reservation structure */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e8acd6ca8f27..32883ec8ca2e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -576,7 +576,10 @@ xfs_dio_write_end_io( nofs_flag = memalloc_nofs_save(); if (flags & IOMAP_DIO_COW) { - error = xfs_reflink_end_cow(ip, offset, size); + if (iocb->ki_flags & IOCB_ATOMIC) + error = xfs_reflink_end_atomic_cow(ip, offset, size); + else + error = xfs_reflink_end_cow(ip, offset, size); if (error) goto out; } diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index f5d338916098..218dee76768b 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -984,6 +984,62 @@ xfs_reflink_end_cow( return error; } +/* + * Fully remap all of the file's data fork at once, which is the critical part + * in achieving atomic behaviour. + * The regular CoW end path does not use function as to keep the block + * reservation per transaction as low as possible. + */ +int +xfs_reflink_end_atomic_cow( + struct xfs_inode *ip, + xfs_off_t offset, + xfs_off_t count) +{ + xfs_fileoff_t offset_fsb; + xfs_fileoff_t end_fsb; + int error = 0; + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + unsigned int resblks; + + trace_xfs_reflink_end_cow(ip, offset, count); + + offset_fsb = XFS_B_TO_FSBT(mp, offset); + end_fsb = XFS_B_TO_FSB(mp, offset + count); + + /* + * Each remapping operation could cause a btree split, so in the worst + * case that's one for each block. + */ + resblks = (end_fsb - offset_fsb) * + XFS_NEXTENTADD_SPACE_RES(mp, 1, XFS_DATA_FORK); + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_atomic_ioend, resblks, 0, + XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, 0); + + while (end_fsb > offset_fsb && !error) { + error = xfs_reflink_end_cow_extent_locked(tp, ip, &offset_fsb, + end_fsb); + } + if (error) { + trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_); + goto out_cancel; + } + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +out_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + /* * Free all CoW staging blocks that are still referenced by the ondisk refcount * metadata. The ondisk metadata does not track which inode created the diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 379619f24247..412e9b6f2082 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -45,6 +45,8 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count, bool cancel_real); extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); +int xfs_reflink_end_atomic_cow(struct xfs_inode *ip, xfs_off_t offset, + xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); extern loff_t xfs_reflink_remap_range(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, -- 2.31.1
Powered by blists - more mailing lists