lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260219114645.778338-2-me@linux.beauty>
Date: Thu, 19 Feb 2026 19:46:42 +0800
From: Li Chen <me@...ux.beauty>
To: Theodore Ts'o <tytso@....edu>,
	Jan Kara <jack@...e.cz>,
	Mark Fasheh <mark@...heh.com>,
	linux-ext4@...r.kernel.org,
	ocfs2-devel@...ts.linux.dev,
	Matthew Wilcox <willy@...radead.org>,
	Jan Kara <jack@...e.com>,
	linux-kernel@...r.kernel.org
Cc: Li Chen <me@...ux.beauty>
Subject: [PATCH v2 1/3] jbd2: store jinode dirty range in PAGE_SIZE units

jbd2_inode fields are updated under journal->j_list_lock, but some paths
read them without holding the lock (e.g. fast commit helpers and ordered
truncate helpers).

READ_ONCE() alone is not sufficient for i_dirty_start/end as they are
loff_t and 32-bit platforms can observe torn loads. Store the dirty range
in PAGE_SIZE units as pgoff_t so lockless readers can take non-torn
snapshots.

Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for
the dirty range and i_flags to match the existing lockless access pattern.

Suggested-by: Jan Kara <jack@...e.cz>
Reviewed-by: Jan Kara <jack@...e.cz>
Signed-off-by: Li Chen <me@...ux.beauty>
---
Changes since v1:
- Store i_dirty_start/end in PAGE_SIZE units (pgoff_t) to avoid torn loads on
  32-bit (pointed out by Matthew, suggested by Jan).
- Use WRITE_ONCE() for i_dirty_* / i_flags updates (per Jan).
- Drop pointless READ_ONCE() on i_vfs_inode in jbd2_wait_inode_data (per Jan).

 fs/jbd2/commit.c      | 65 ++++++++++++++++++++++++++++++++++---------
 fs/jbd2/journal.c     |  3 +-
 fs/jbd2/transaction.c | 20 ++++++++-----
 include/linux/jbd2.h  | 17 +++++++----
 4 files changed, 78 insertions(+), 27 deletions(-)

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7203d2d2624d..d98f4dbde695 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
 /* Send all the data buffers related to an inode */
 int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
 {
-	if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+	unsigned long flags;
+
+	if (!jinode)
+		return 0;
+
+	flags = READ_ONCE(jinode->i_flags);
+	if (!(flags & JI_WRITE_DATA))
 		return 0;
 
 	trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -191,12 +197,35 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
 
 int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
 {
-	if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
-		!jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+	struct address_space *mapping;
+	struct inode *inode;
+	unsigned long flags;
+	pgoff_t start, end;
+	loff_t start_byte, end_byte;
+
+	if (!jinode)
+		return 0;
+
+	flags = READ_ONCE(jinode->i_flags);
+	if (!(flags & JI_WAIT_DATA))
+		return 0;
+
+	inode = jinode->i_vfs_inode;
+	if (!inode)
+		return 0;
+
+	mapping = inode->i_mapping;
+	start = READ_ONCE(jinode->i_dirty_start);
+	end = READ_ONCE(jinode->i_dirty_end);
+	if (end == JBD2_INODE_DIRTY_RANGE_NONE)
+		return 0;
+	start_byte = (loff_t)start << PAGE_SHIFT;
+	end_byte = ((loff_t)end << PAGE_SHIFT) + PAGE_SIZE - 1;
+
+	if (!mapping)
 		return 0;
 	return filemap_fdatawait_range_keep_errors(
-		jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
-		jinode->i_dirty_end);
+		mapping, start_byte, end_byte);
 }
 EXPORT_SYMBOL(jbd2_wait_inode_data);
 
@@ -218,7 +247,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		if (!(jinode->i_flags & JI_WRITE_DATA))
 			continue;
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags,
+			   jinode->i_flags | JI_COMMIT_RUNNING);
 		spin_unlock(&journal->j_list_lock);
 		/* submit the inode data buffers. */
 		trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -229,7 +259,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 		}
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags,
+			   jinode->i_flags & ~JI_COMMIT_RUNNING);
 		smp_mb();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -240,10 +271,17 @@ static int journal_submit_data_buffers(journal_t *journal,
 int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
 {
 	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+	pgoff_t start = READ_ONCE(jinode->i_dirty_start);
+	pgoff_t end = READ_ONCE(jinode->i_dirty_end);
+	loff_t start_byte, end_byte;
+
+	if (end == JBD2_INODE_DIRTY_RANGE_NONE)
+		return 0;
+	start_byte = (loff_t)start << PAGE_SHIFT;
+	end_byte = ((loff_t)end << PAGE_SHIFT) + PAGE_SIZE - 1;
 
 	return filemap_fdatawait_range_keep_errors(mapping,
-						   jinode->i_dirty_start,
-						   jinode->i_dirty_end);
+						   start_byte, end_byte);
 }
 
 /*
@@ -262,7 +300,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		if (!(jinode->i_flags & JI_WAIT_DATA))
 			continue;
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
 		spin_unlock(&journal->j_list_lock);
 		/* wait for the inode data buffers writeout. */
 		if (journal->j_finish_inode_data_buffers) {
@@ -272,7 +310,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 		}
 		cond_resched();
 		spin_lock(&journal->j_list_lock);
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
 		smp_mb();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -288,8 +326,9 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 				&jinode->i_transaction->t_inode_list);
 		} else {
 			jinode->i_transaction = NULL;
-			jinode->i_dirty_start = 0;
-			jinode->i_dirty_end = 0;
+			WRITE_ONCE(jinode->i_dirty_start, 0);
+			WRITE_ONCE(jinode->i_dirty_end,
+				   JBD2_INODE_DIRTY_RANGE_NONE);
 		}
 	}
 	spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c973162d5b31..9a7477c54dcb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -3021,7 +3021,7 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
 	jinode->i_vfs_inode = inode;
 	jinode->i_flags = 0;
 	jinode->i_dirty_start = 0;
-	jinode->i_dirty_end = 0;
+	jinode->i_dirty_end = JBD2_INODE_DIRTY_RANGE_NONE;
 	INIT_LIST_HEAD(&jinode->i_list);
 }
 
@@ -3178,4 +3178,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
 MODULE_LICENSE("GPL");
 module_init(journal_init);
 module_exit(journal_exit);
-
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index dca4b5d8aaaa..bbe47be6c73c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2646,6 +2646,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal;
+	pgoff_t start, end;
 
 	if (is_handle_aborted(handle))
 		return -EROFS;
@@ -2654,15 +2655,20 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 	jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
 			transaction->t_tid);
 
+	start = (pgoff_t)(start_byte >> PAGE_SHIFT);
+	end = (pgoff_t)(end_byte >> PAGE_SHIFT);
+
 	spin_lock(&journal->j_list_lock);
-	jinode->i_flags |= flags;
+	WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
 
-	if (jinode->i_dirty_end) {
-		jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
-		jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+	if (jinode->i_dirty_end != JBD2_INODE_DIRTY_RANGE_NONE) {
+		WRITE_ONCE(jinode->i_dirty_start,
+			   min(jinode->i_dirty_start, start));
+		WRITE_ONCE(jinode->i_dirty_end,
+			   max(jinode->i_dirty_end, end));
 	} else {
-		jinode->i_dirty_start = start_byte;
-		jinode->i_dirty_end = end_byte;
+		WRITE_ONCE(jinode->i_dirty_start, start);
+		WRITE_ONCE(jinode->i_dirty_end, end);
 	}
 
 	/* Is inode already attached where we need it? */
@@ -2739,7 +2745,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
 	int ret = 0;
 
 	/* This is a quick check to avoid locking if not necessary */
-	if (!jinode->i_transaction)
+	if (!READ_ONCE(jinode->i_transaction))
 		goto out;
 	/* Locks are here just to force reading of recent values, it is
 	 * enough that the transaction was not committing before we started
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a53a00d36228..81eb58ddc126 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -390,6 +390,8 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
 /* Wait for outstanding data writes for this inode before commit */
 #define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
 
+#define JBD2_INODE_DIRTY_RANGE_NONE	((pgoff_t)-1)
+
 /**
  * struct jbd2_inode - The jbd_inode type is the structure linking inodes in
  * ordered mode present in a transaction so that we can sync them during commit.
@@ -431,18 +433,23 @@ struct jbd2_inode {
 	/**
 	 * @i_dirty_start:
 	 *
-	 * Offset in bytes where the dirty range for this inode starts.
+	 * Dirty range start in PAGE_SIZE units.
+	 *
+	 * The dirty range is empty if @i_dirty_end is set to
+	 * %JBD2_INODE_DIRTY_RANGE_NONE.
+	 *
 	 * [j_list_lock]
 	 */
-	loff_t i_dirty_start;
+	pgoff_t i_dirty_start;
 
 	/**
 	 * @i_dirty_end:
 	 *
-	 * Inclusive offset in bytes where the dirty range for this inode
-	 * ends. [j_list_lock]
+	 * Dirty range end in PAGE_SIZE units (inclusive).
+	 *
+	 * [j_list_lock]
 	 */
-	loff_t i_dirty_end;
+	pgoff_t i_dirty_end;
 };
 
 struct jbd2_revoke_table_s;
-- 
2.52.0

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ