[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260219114645.778338-2-me@linux.beauty>
Date: Thu, 19 Feb 2026 19:46:42 +0800
From: Li Chen <me@...ux.beauty>
To: Theodore Ts'o <tytso@....edu>,
Jan Kara <jack@...e.cz>,
Mark Fasheh <mark@...heh.com>,
linux-ext4@...r.kernel.org,
ocfs2-devel@...ts.linux.dev,
Matthew Wilcox <willy@...radead.org>,
Jan Kara <jack@...e.com>,
linux-kernel@...r.kernel.org
Cc: Li Chen <me@...ux.beauty>
Subject: [PATCH v2 1/3] jbd2: store jinode dirty range in PAGE_SIZE units
jbd2_inode fields are updated under journal->j_list_lock, but some paths
read them without holding the lock (e.g. fast commit helpers and ordered
truncate helpers).
READ_ONCE() alone is not sufficient for i_dirty_start/end as they are
loff_t and 32-bit platforms can observe torn loads. Store the dirty range
in PAGE_SIZE units as pgoff_t so lockless readers can take non-torn
snapshots.
Use READ_ONCE() on the read side and WRITE_ONCE() on the write side for
the dirty range and i_flags to match the existing lockless access pattern.
Suggested-by: Jan Kara <jack@...e.cz>
Reviewed-by: Jan Kara <jack@...e.cz>
Signed-off-by: Li Chen <me@...ux.beauty>
---
Changes since v1:
- Store i_dirty_start/end in PAGE_SIZE units (pgoff_t) to avoid torn loads on
32-bit (pointed out by Matthew, suggested by Jan).
- Use WRITE_ONCE() for i_dirty_* / i_flags updates (per Jan).
- Drop pointless READ_ONCE() on i_vfs_inode in jbd2_wait_inode_data (per Jan).
fs/jbd2/commit.c | 65 ++++++++++++++++++++++++++++++++++---------
fs/jbd2/journal.c | 3 +-
fs/jbd2/transaction.c | 20 ++++++++-----
include/linux/jbd2.h | 17 +++++++----
4 files changed, 78 insertions(+), 27 deletions(-)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7203d2d2624d..d98f4dbde695 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -180,7 +180,13 @@ static int journal_wait_on_commit_record(journal_t *journal,
/* Send all the data buffers related to an inode */
int jbd2_submit_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
- if (!jinode || !(jinode->i_flags & JI_WRITE_DATA))
+ unsigned long flags;
+
+ if (!jinode)
+ return 0;
+
+ flags = READ_ONCE(jinode->i_flags);
+ if (!(flags & JI_WRITE_DATA))
return 0;
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -191,12 +197,35 @@ EXPORT_SYMBOL(jbd2_submit_inode_data);
int jbd2_wait_inode_data(journal_t *journal, struct jbd2_inode *jinode)
{
- if (!jinode || !(jinode->i_flags & JI_WAIT_DATA) ||
- !jinode->i_vfs_inode || !jinode->i_vfs_inode->i_mapping)
+ struct address_space *mapping;
+ struct inode *inode;
+ unsigned long flags;
+ pgoff_t start, end;
+ loff_t start_byte, end_byte;
+
+ if (!jinode)
+ return 0;
+
+ flags = READ_ONCE(jinode->i_flags);
+ if (!(flags & JI_WAIT_DATA))
+ return 0;
+
+ inode = jinode->i_vfs_inode;
+ if (!inode)
+ return 0;
+
+ mapping = inode->i_mapping;
+ start = READ_ONCE(jinode->i_dirty_start);
+ end = READ_ONCE(jinode->i_dirty_end);
+ if (end == JBD2_INODE_DIRTY_RANGE_NONE)
+ return 0;
+ start_byte = (loff_t)start << PAGE_SHIFT;
+ end_byte = ((loff_t)end << PAGE_SHIFT) + PAGE_SIZE - 1;
+
+ if (!mapping)
return 0;
return filemap_fdatawait_range_keep_errors(
- jinode->i_vfs_inode->i_mapping, jinode->i_dirty_start,
- jinode->i_dirty_end);
+ mapping, start_byte, end_byte);
}
EXPORT_SYMBOL(jbd2_wait_inode_data);
@@ -218,7 +247,8 @@ static int journal_submit_data_buffers(journal_t *journal,
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WRITE_DATA))
continue;
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ WRITE_ONCE(jinode->i_flags,
+ jinode->i_flags | JI_COMMIT_RUNNING);
spin_unlock(&journal->j_list_lock);
/* submit the inode data buffers. */
trace_jbd2_submit_inode_data(jinode->i_vfs_inode);
@@ -229,7 +259,8 @@ static int journal_submit_data_buffers(journal_t *journal,
}
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ WRITE_ONCE(jinode->i_flags,
+ jinode->i_flags & ~JI_COMMIT_RUNNING);
smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -240,10 +271,17 @@ static int journal_submit_data_buffers(journal_t *journal,
int jbd2_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
{
struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
+ pgoff_t start = READ_ONCE(jinode->i_dirty_start);
+ pgoff_t end = READ_ONCE(jinode->i_dirty_end);
+ loff_t start_byte, end_byte;
+
+ if (end == JBD2_INODE_DIRTY_RANGE_NONE)
+ return 0;
+ start_byte = (loff_t)start << PAGE_SHIFT;
+ end_byte = ((loff_t)end << PAGE_SHIFT) + PAGE_SIZE - 1;
return filemap_fdatawait_range_keep_errors(mapping,
- jinode->i_dirty_start,
- jinode->i_dirty_end);
+ start_byte, end_byte);
}
/*
@@ -262,7 +300,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
if (!(jinode->i_flags & JI_WAIT_DATA))
continue;
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ WRITE_ONCE(jinode->i_flags, jinode->i_flags | JI_COMMIT_RUNNING);
spin_unlock(&journal->j_list_lock);
/* wait for the inode data buffers writeout. */
if (journal->j_finish_inode_data_buffers) {
@@ -272,7 +310,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
}
cond_resched();
spin_lock(&journal->j_list_lock);
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ WRITE_ONCE(jinode->i_flags, jinode->i_flags & ~JI_COMMIT_RUNNING);
smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -288,8 +326,9 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
&jinode->i_transaction->t_inode_list);
} else {
jinode->i_transaction = NULL;
- jinode->i_dirty_start = 0;
- jinode->i_dirty_end = 0;
+ WRITE_ONCE(jinode->i_dirty_start, 0);
+ WRITE_ONCE(jinode->i_dirty_end,
+ JBD2_INODE_DIRTY_RANGE_NONE);
}
}
spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c973162d5b31..9a7477c54dcb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -3021,7 +3021,7 @@ void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
jinode->i_vfs_inode = inode;
jinode->i_flags = 0;
jinode->i_dirty_start = 0;
- jinode->i_dirty_end = 0;
+ jinode->i_dirty_end = JBD2_INODE_DIRTY_RANGE_NONE;
INIT_LIST_HEAD(&jinode->i_list);
}
@@ -3178,4 +3178,3 @@ MODULE_DESCRIPTION("Generic filesystem journal-writing module");
MODULE_LICENSE("GPL");
module_init(journal_init);
module_exit(journal_exit);
-
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index dca4b5d8aaaa..bbe47be6c73c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -2646,6 +2646,7 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal;
+ pgoff_t start, end;
if (is_handle_aborted(handle))
return -EROFS;
@@ -2654,15 +2655,20 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
jbd2_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
transaction->t_tid);
+ start = (pgoff_t)(start_byte >> PAGE_SHIFT);
+ end = (pgoff_t)(end_byte >> PAGE_SHIFT);
+
spin_lock(&journal->j_list_lock);
- jinode->i_flags |= flags;
+ WRITE_ONCE(jinode->i_flags, jinode->i_flags | flags);
- if (jinode->i_dirty_end) {
- jinode->i_dirty_start = min(jinode->i_dirty_start, start_byte);
- jinode->i_dirty_end = max(jinode->i_dirty_end, end_byte);
+ if (jinode->i_dirty_end != JBD2_INODE_DIRTY_RANGE_NONE) {
+ WRITE_ONCE(jinode->i_dirty_start,
+ min(jinode->i_dirty_start, start));
+ WRITE_ONCE(jinode->i_dirty_end,
+ max(jinode->i_dirty_end, end));
} else {
- jinode->i_dirty_start = start_byte;
- jinode->i_dirty_end = end_byte;
+ WRITE_ONCE(jinode->i_dirty_start, start);
+ WRITE_ONCE(jinode->i_dirty_end, end);
}
/* Is inode already attached where we need it? */
@@ -2739,7 +2745,7 @@ int jbd2_journal_begin_ordered_truncate(journal_t *journal,
int ret = 0;
/* This is a quick check to avoid locking if not necessary */
- if (!jinode->i_transaction)
+ if (!READ_ONCE(jinode->i_transaction))
goto out;
/* Locks are here just to force reading of recent values, it is
* enough that the transaction was not committing before we started
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a53a00d36228..81eb58ddc126 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -390,6 +390,8 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
/* Wait for outstanding data writes for this inode before commit */
#define JI_WAIT_DATA (1 << __JI_WAIT_DATA)
+#define JBD2_INODE_DIRTY_RANGE_NONE ((pgoff_t)-1)
+
/**
* struct jbd2_inode - The jbd_inode type is the structure linking inodes in
* ordered mode present in a transaction so that we can sync them during commit.
@@ -431,18 +433,23 @@ struct jbd2_inode {
/**
* @i_dirty_start:
*
- * Offset in bytes where the dirty range for this inode starts.
+ * Dirty range start in PAGE_SIZE units.
+ *
+ * The dirty range is empty if @i_dirty_end is set to
+ * %JBD2_INODE_DIRTY_RANGE_NONE.
+ *
* [j_list_lock]
*/
- loff_t i_dirty_start;
+ pgoff_t i_dirty_start;
/**
* @i_dirty_end:
*
- * Inclusive offset in bytes where the dirty range for this inode
- * ends. [j_list_lock]
+ * Dirty range end in PAGE_SIZE units (inclusive).
+ *
+ * [j_list_lock]
*/
- loff_t i_dirty_end;
+ pgoff_t i_dirty_end;
};
struct jbd2_revoke_table_s;
--
2.52.0
Powered by blists - more mailing lists