[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <20241124094253.565643-1-zhenghaoran@buaa.edu.cn>
Date: Sun, 24 Nov 2024 17:42:53 +0800
From: Hao-ran Zheng <zhenghaoran@...a.edu.cn>
To: viro@...iv.linux.org.uk,
brauner@...nel.org,
jack@...e.cz,
linux-fsdevel@...r.kernel.org,
linux-kernel@...r.kernel.org
Cc: baijiaju1990@...il.com,
zhenghaoran@...a.edu.cn,
21371365@...a.edu.cn
Subject: [PATCH v4] fs: Fix data race in inode_set_ctime_to_ts
A data race may occur when the function `inode_set_ctime_to_ts()` and
the function `inode_get_ctime_sec()` are executed concurrently. When
two threads call `aio_read` and `aio_write` respectively, they will
be distributed to the read and write functions of the corresponding
file system respectively. Taking the btrfs file system as an example,
the `btrfs_file_read_iter` and `btrfs_file_write_iter` functions are
finally called. These two functions created a data race when they
finally called `inode_get_ctime_sec()` and `inode_set_ctime_to_ns()`.
The specific call stack that appears during testing is as follows:
============DATA_RACE============
btrfs_delayed_update_inode+0x1f61/0x7ce0 [btrfs]
btrfs_update_inode+0x45e/0xbb0 [btrfs]
btrfs_dirty_inode+0x2b8/0x530 [btrfs]
btrfs_update_time+0x1ad/0x230 [btrfs]
touch_atime+0x211/0x440
filemap_read+0x90f/0xa20
btrfs_file_read_iter+0xeb/0x580 [btrfs]
aio_read+0x275/0x3a0
io_submit_one+0xd22/0x1ce0
__se_sys_io_submit+0xb3/0x250
do_syscall_64+0xc1/0x190
entry_SYSCALL_64_after_hwframe+0x77/0x7f
============OTHER_INFO============
btrfs_write_check+0xa15/0x1390 [btrfs]
btrfs_buffered_write+0x52f/0x29d0 [btrfs]
btrfs_do_write_iter+0x53d/0x1590 [btrfs]
btrfs_file_write_iter+0x41/0x60 [btrfs]
aio_write+0x41e/0x5f0
io_submit_one+0xd42/0x1ce0
__se_sys_io_submit+0xb3/0x250
do_syscall_64+0xc1/0x190
entry_SYSCALL_64_after_hwframe+0x77/0x7f
To address this issue, it is recommended to add WRITE_ONCE
and READ_ONCE when reading or writing the `inode->i_ctime_sec`
and `inode->i_ctime_nsec` variable.
Signed-off-by: Hao-ran Zheng <zhenghaoran@...a.edu.cn>
---
V3 -> V4: Fixed patch for latest code
V2 -> V3: Added READ_ONCE in inode_get_ctime_nsec() and addressed review comments
V1 -> V2: Added READ_ONCE in inode_get_ctime_sec()
---
fs/inode.c | 16 ++++++++--------
fs/stat.c | 2 +-
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/fs/inode.c b/fs/inode.c
index b13b778257ae..bfab370c8622 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -2713,8 +2713,8 @@ struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 t
{
trace_inode_set_ctime_to_ts(inode, &ts);
set_normalized_timespec64(&ts, ts.tv_sec, ts.tv_nsec);
- inode->i_ctime_sec = ts.tv_sec;
- inode->i_ctime_nsec = ts.tv_nsec;
+ WRITE_ONCE(inode->i_ctime_sec, ts.tv_sec);
+ WRITE_ONCE(inode->i_ctime_nsec, ts.tv_nsec);
return ts;
}
EXPORT_SYMBOL(inode_set_ctime_to_ts);
@@ -2788,7 +2788,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
*/
cns = smp_load_acquire(&inode->i_ctime_nsec);
if (cns & I_CTIME_QUERIED) {
- struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec,
+ struct timespec64 ctime = { .tv_sec = READ_ONCE(inode->i_ctime_sec),
.tv_nsec = cns & ~I_CTIME_QUERIED };
if (timespec64_compare(&now, &ctime) <= 0) {
@@ -2809,7 +2809,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
/* Try to swap the nsec value into place. */
if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) {
/* If swap occurred, then we're (mostly) done */
- inode->i_ctime_sec = now.tv_sec;
+ WRITE_ONCE(inode->i_ctime_sec, now.tv_sec);
trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
mgtime_counter_inc(mg_ctime_swaps);
} else {
@@ -2824,7 +2824,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
goto retry;
}
/* Otherwise, keep the existing ctime */
- now.tv_sec = inode->i_ctime_sec;
+ now.tv_sec = READ_ONCE(inode->i_ctime_sec);
now.tv_nsec = cur & ~I_CTIME_QUERIED;
}
out:
@@ -2857,7 +2857,7 @@ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 u
/* pairs with try_cmpxchg below */
cur = smp_load_acquire(&inode->i_ctime_nsec);
cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
- cur_ts.tv_sec = inode->i_ctime_sec;
+ cur_ts.tv_sec = READ_ONCE(inode->i_ctime_sec);
/* If the update is older than the existing value, skip it. */
if (timespec64_compare(&update, &cur_ts) <= 0)
@@ -2883,7 +2883,7 @@ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 u
retry:
old = cur;
if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
- inode->i_ctime_sec = update.tv_sec;
+ WRITE_ONCE(inode->i_ctime_sec, update.tv_sec);
mgtime_counter_inc(mg_ctime_swaps);
return update;
}
@@ -2899,7 +2899,7 @@ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 u
goto retry;
/* Otherwise, it was a new timestamp. */
- cur_ts.tv_sec = inode->i_ctime_sec;
+ cur_ts.tv_sec = READ_ONCE(inode->i_ctime_sec);
cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
return cur_ts;
}
diff --git a/fs/stat.c b/fs/stat.c
index 0870e969a8a0..e39c78cd62f3 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -53,7 +53,7 @@ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
}
stat->mtime = inode_get_mtime(inode);
- stat->ctime.tv_sec = inode->i_ctime_sec;
+ stat->ctime.tv_sec = READ_ONCE(inode->i_ctime_sec);
stat->ctime.tv_nsec = (u32)atomic_read(pcn);
if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED))
stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn));
--
2.34.1
Powered by blists - more mailing lists