lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241124174435.GB620578@frogsfrogsfrogs>
Date: Sun, 24 Nov 2024 09:44:35 -0800
From: "Darrick J. Wong" <djwong@...nel.org>
To: Hao-ran Zheng <zhenghaoran@...a.edu.cn>
Cc: viro@...iv.linux.org.uk, brauner@...nel.org, jack@...e.cz,
	linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
	baijiaju1990@...il.com, 21371365@...a.edu.cn
Subject: Re: [PATCH v4] fs: Fix data race in inode_set_ctime_to_ts

On Sun, Nov 24, 2024 at 05:42:53PM +0800, Hao-ran Zheng wrote:
> A data race may occur when the function `inode_set_ctime_to_ts()` and
> the function `inode_get_ctime_sec()` are executed concurrently. When
> two threads call `aio_read` and `aio_write` respectively, they will
> be distributed to the read and write functions of the corresponding
> file system respectively. Taking the btrfs file system as an example,
> the `btrfs_file_read_iter` and `btrfs_file_write_iter` functions are
> finally called. These two functions created a data race when they
> finally called `inode_get_ctime_sec()` and `inode_set_ctime_to_ns()`.
> The specific call stack that appears during testing is as follows:
> 
> ============DATA_RACE============
> btrfs_delayed_update_inode+0x1f61/0x7ce0 [btrfs]
> btrfs_update_inode+0x45e/0xbb0 [btrfs]
> btrfs_dirty_inode+0x2b8/0x530 [btrfs]
> btrfs_update_time+0x1ad/0x230 [btrfs]
> touch_atime+0x211/0x440
> filemap_read+0x90f/0xa20
> btrfs_file_read_iter+0xeb/0x580 [btrfs]
> aio_read+0x275/0x3a0
> io_submit_one+0xd22/0x1ce0
> __se_sys_io_submit+0xb3/0x250
> do_syscall_64+0xc1/0x190
> entry_SYSCALL_64_after_hwframe+0x77/0x7f
> ============OTHER_INFO============
> btrfs_write_check+0xa15/0x1390 [btrfs]
> btrfs_buffered_write+0x52f/0x29d0 [btrfs]
> btrfs_do_write_iter+0x53d/0x1590 [btrfs]
> btrfs_file_write_iter+0x41/0x60 [btrfs]
> aio_write+0x41e/0x5f0
> io_submit_one+0xd42/0x1ce0
> __se_sys_io_submit+0xb3/0x250
> do_syscall_64+0xc1/0x190
> entry_SYSCALL_64_after_hwframe+0x77/0x7f
> 
> To address this issue, it is recommended to add WRITE_ONCE
> and READ_ONCE when reading or writing the `inode->i_ctime_sec`
> and `inode->i_ctime_nsec` variable.

Excuse my ignorance, but how exactly does this annotation fix the race?
Does it prevent reordering of the memory accesses or something?  "it is
recommended" is not enough for dunces such as myself to figure out why
this fixes the problem. :(

--D

> Signed-off-by: Hao-ran Zheng <zhenghaoran@...a.edu.cn>
> ---
> V3 -> V4: Fixed patch for latest code
> V2 -> V3: Added READ_ONCE in inode_get_ctime_nsec() and addressed review comments
> V1 -> V2: Added READ_ONCE in inode_get_ctime_sec()
> ---
>  fs/inode.c | 16 ++++++++--------
>  fs/stat.c  |  2 +-
>  2 files changed, 9 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/inode.c b/fs/inode.c
> index b13b778257ae..bfab370c8622 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -2713,8 +2713,8 @@ struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 t
>  {
>  	trace_inode_set_ctime_to_ts(inode, &ts);
>  	set_normalized_timespec64(&ts, ts.tv_sec, ts.tv_nsec);
> -	inode->i_ctime_sec = ts.tv_sec;
> -	inode->i_ctime_nsec = ts.tv_nsec;
> +	WRITE_ONCE(inode->i_ctime_sec, ts.tv_sec);
> +	WRITE_ONCE(inode->i_ctime_nsec, ts.tv_nsec);
>  	return ts;
>  }
>  EXPORT_SYMBOL(inode_set_ctime_to_ts);
> @@ -2788,7 +2788,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
>  	 */
>  	cns = smp_load_acquire(&inode->i_ctime_nsec);
>  	if (cns & I_CTIME_QUERIED) {
> -		struct timespec64 ctime = { .tv_sec = inode->i_ctime_sec,
> +		struct timespec64 ctime = { .tv_sec = READ_ONCE(inode->i_ctime_sec),
>  					    .tv_nsec = cns & ~I_CTIME_QUERIED };
>  
>  		if (timespec64_compare(&now, &ctime) <= 0) {
> @@ -2809,7 +2809,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
>  	/* Try to swap the nsec value into place. */
>  	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, now.tv_nsec)) {
>  		/* If swap occurred, then we're (mostly) done */
> -		inode->i_ctime_sec = now.tv_sec;
> +		WRITE_ONCE(inode->i_ctime_sec, now.tv_sec);
>  		trace_ctime_ns_xchg(inode, cns, now.tv_nsec, cur);
>  		mgtime_counter_inc(mg_ctime_swaps);
>  	} else {
> @@ -2824,7 +2824,7 @@ struct timespec64 inode_set_ctime_current(struct inode *inode)
>  			goto retry;
>  		}
>  		/* Otherwise, keep the existing ctime */
> -		now.tv_sec = inode->i_ctime_sec;
> +		now.tv_sec = READ_ONCE(inode->i_ctime_sec);
>  		now.tv_nsec = cur & ~I_CTIME_QUERIED;
>  	}
>  out:
> @@ -2857,7 +2857,7 @@ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 u
>  	/* pairs with try_cmpxchg below */
>  	cur = smp_load_acquire(&inode->i_ctime_nsec);
>  	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
> -	cur_ts.tv_sec = inode->i_ctime_sec;
> +	cur_ts.tv_sec = READ_ONCE(inode->i_ctime_sec);
>  
>  	/* If the update is older than the existing value, skip it. */
>  	if (timespec64_compare(&update, &cur_ts) <= 0)
> @@ -2883,7 +2883,7 @@ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 u
>  retry:
>  	old = cur;
>  	if (try_cmpxchg(&inode->i_ctime_nsec, &cur, update.tv_nsec)) {
> -		inode->i_ctime_sec = update.tv_sec;
> +		WRITE_ONCE(inode->i_ctime_sec, update.tv_sec);
>  		mgtime_counter_inc(mg_ctime_swaps);
>  		return update;
>  	}
> @@ -2899,7 +2899,7 @@ struct timespec64 inode_set_ctime_deleg(struct inode *inode, struct timespec64 u
>  		goto retry;
>  
>  	/* Otherwise, it was a new timestamp. */
> -	cur_ts.tv_sec = inode->i_ctime_sec;
> +	cur_ts.tv_sec = READ_ONCE(inode->i_ctime_sec);
>  	cur_ts.tv_nsec = cur & ~I_CTIME_QUERIED;
>  	return cur_ts;
>  }
> diff --git a/fs/stat.c b/fs/stat.c
> index 0870e969a8a0..e39c78cd62f3 100644
> --- a/fs/stat.c
> +++ b/fs/stat.c
> @@ -53,7 +53,7 @@ void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
>  	}
>  
>  	stat->mtime = inode_get_mtime(inode);
> -	stat->ctime.tv_sec = inode->i_ctime_sec;
> +	stat->ctime.tv_sec = READ_ONCE(inode->i_ctime_sec);
>  	stat->ctime.tv_nsec = (u32)atomic_read(pcn);
>  	if (!(stat->ctime.tv_nsec & I_CTIME_QUERIED))
>  		stat->ctime.tv_nsec = ((u32)atomic_fetch_or(I_CTIME_QUERIED, pcn));
> -- 
> 2.34.1
> 
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ