>From 2d4018f7fe647a440f488d71cf4ceffccaecb02e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 19 Jan 2011 13:45:04 +0100 Subject: [PATCH 1/2] jbd2: Refine commit writeout logic Currently we write out all journal buffers in WRITE_SYNC mode. This improves performance for fsync heavy workloads but hinders performance when writes are mostly asynchronous. So add possibility for callers starting a transaction commit to specify whether they are going to wait for the commit and submit journal writes in WRITE_SYNC mode only in that case. Signed-off-by: Jan Kara --- fs/ext4/fsync.c | 2 +- fs/ext4/super.c | 2 +- fs/jbd2/checkpoint.c | 2 +- fs/jbd2/commit.c | 4 ++-- fs/jbd2/journal.c | 19 ++++++++++--------- fs/jbd2/transaction.c | 13 ++++++------- fs/ocfs2/aops.c | 2 +- fs/ocfs2/super.c | 2 +- include/linux/jbd2.h | 18 ++++++++++-------- 9 files changed, 33 insertions(+), 31 deletions(-) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7829b28..19434da 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -198,7 +198,7 @@ int ext4_sync_file(struct file *file, int datasync) return ext4_force_commit(inode->i_sb); commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; - if (jbd2_log_start_commit(journal, commit_tid)) { + if (jbd2_log_start_commit(journal, commit_tid, true)) { /* * When the journal is on a different device than the * fs data disk, we need to issue the barrier in diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f6a318f..bfce0d6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4115,7 +4115,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) trace_ext4_sync_fs(sb, wait); flush_workqueue(sbi->dio_unwritten_wq); - if (jbd2_journal_start_commit(sbi->s_journal, &target)) { + if (jbd2_journal_start_commit(sbi->s_journal, &target, true)) { if (wait) jbd2_log_wait_commit(sbi->s_journal, target); } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 6a79fd0..3436d53 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -309,7 +309,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, "Waiting for Godot: block %llu\n", journal->j_devname, (unsigned long long) bh->b_blocknr); - jbd2_log_start_commit(journal, tid); + jbd2_log_start_commit(journal, tid, true); jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f3ad159..19973eb 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -329,7 +329,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; - int write_op = WRITE_SYNC; + int write_op = WRITE; /* * First job: lock down the current transaction and wait for @@ -368,7 +368,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) * we unplug the device. We don't do explicit unplugging in here, * instead we rely on sync_buffer() doing the unplug for us. */ - if (commit_transaction->t_synchronous_commit) + if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid)) write_op = WRITE_SYNC_PLUG; trace_jbd2_commit_locking(journal, commit_transaction); stats.run.rs_wait = commit_transaction->t_max_wait; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 97e7346..04835d6 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -476,7 +476,7 @@ int __jbd2_log_space_left(journal_t *journal) * Called with j_state_lock locked for writing. * Returns true if a transaction commit was started. */ -int __jbd2_log_start_commit(journal_t *journal, tid_t target) +int __jbd2_log_start_commit(journal_t *journal, tid_t target, bool will_wait) { /* * Are we already doing a recent enough commit? @@ -486,7 +486,8 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) * We want a new commit: OK, mark the request and wakeup the * commit thread. We do _not_ do the commit ourselves. */ - + if (will_wait && !tid_geq(journal->j_commit_waited, target)) + journal->j_commit_waited = target; journal->j_commit_request = target; jbd_debug(1, "JBD: requesting commit %d/%d\n", journal->j_commit_request, @@ -497,12 +498,12 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) return 0; } -int jbd2_log_start_commit(journal_t *journal, tid_t tid) +int jbd2_log_start_commit(journal_t *journal, tid_t tid, bool will_wait) { int ret; write_lock(&journal->j_state_lock); - ret = __jbd2_log_start_commit(journal, tid); + ret = __jbd2_log_start_commit(journal, tid, will_wait); write_unlock(&journal->j_state_lock); return ret; } @@ -539,7 +540,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) tid = transaction->t_tid; read_unlock(&journal->j_state_lock); if (need_to_start) - jbd2_log_start_commit(journal, tid); + jbd2_log_start_commit(journal, tid, true); jbd2_log_wait_commit(journal, tid); return 1; } @@ -549,7 +550,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) * if a transaction is going to be committed (or is currently already * committing), and fills its tid in at *ptid */ -int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) +int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid, bool will_wait) { int ret = 0; @@ -557,7 +558,7 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid) if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; - __jbd2_log_start_commit(journal, tid); + __jbd2_log_start_commit(journal, tid, will_wait); /* There's a running transaction and we've just made sure * it's commit has been scheduled. */ if (ptid) @@ -1564,7 +1565,7 @@ int jbd2_journal_flush(journal_t *journal) /* Force everything buffered to the log... */ if (journal->j_running_transaction) { transaction = journal->j_running_transaction; - __jbd2_log_start_commit(journal, transaction->t_tid); + __jbd2_log_start_commit(journal, transaction->t_tid, true); } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; @@ -1680,7 +1681,7 @@ void __jbd2_journal_abort_hard(journal_t *journal) journal->j_flags |= JBD2_ABORT; transaction = journal->j_running_transaction; if (transaction) - __jbd2_log_start_commit(journal, transaction->t_tid); + __jbd2_log_start_commit(journal, transaction->t_tid, false); write_unlock(&journal->j_state_lock); } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 1d11910..2211dae 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -226,7 +226,7 @@ repeat: need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) - jbd2_log_start_commit(journal, tid); + jbd2_log_start_commit(journal, tid, false); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); goto repeat; @@ -473,7 +473,7 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) - jbd2_log_start_commit(journal, tid); + jbd2_log_start_commit(journal, tid, false); lock_map_release(&handle->h_lockdep_map); handle->h_buffer_credits = nblocks; @@ -1368,8 +1368,6 @@ int jbd2_journal_stop(handle_t *handle) } } - if (handle->h_sync) - transaction->t_synchronous_commit = 1; current->journal_info = NULL; atomic_sub(handle->h_buffer_credits, &transaction->t_outstanding_credits); @@ -1390,15 +1388,16 @@ int jbd2_journal_stop(handle_t *handle) jbd_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); - /* This is non-blocking */ - jbd2_log_start_commit(journal, transaction->t_tid); - /* * Special case: JBD2_SYNC synchronous updates require us * to wait for the commit to complete. */ if (handle->h_sync && !(current->flags & PF_MEMALLOC)) wait_for_commit = 1; + + /* This is non-blocking */ + jbd2_log_start_commit(journal, transaction->t_tid, + wait_for_commit); } /* diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1fbb0e2..d493f32 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1659,7 +1659,7 @@ static int ocfs2_try_to_free_truncate_log(struct ocfs2_super *osb, goto out; } - if (jbd2_journal_start_commit(osb->journal->j_journal, &target)) { + if (jbd2_journal_start_commit(osb->journal->j_journal, &target, true)) { jbd2_log_wait_commit(osb->journal->j_journal, target); ret = 1; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 38f986d..45d9f82 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -414,7 +414,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) } if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal, - &target)) { + &target, wait)) { if (wait) jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, target); diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 27e79c2..46aaf45 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -631,11 +631,6 @@ struct transaction_s */ atomic_t t_handle_count; - /* - * This transaction is being forced and some process is - * waiting for it to finish. - */ - unsigned int t_synchronous_commit:1; unsigned int t_flushed_data_blocks:1; /* @@ -900,6 +895,13 @@ struct journal_s tid_t j_commit_request; /* + * Sequence number of the most recent transaction someone is waiting + * for to commit. + * [j_state_lock] + */ + tid_t j_commit_waited; + + /* * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single @@ -1200,9 +1202,9 @@ extern void jbd2_journal_switch_revoke_table(journal_t *journal); */ int __jbd2_log_space_left(journal_t *); /* Called with journal locked */ -int jbd2_log_start_commit(journal_t *journal, tid_t tid); -int __jbd2_log_start_commit(journal_t *journal, tid_t tid); -int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); +int jbd2_log_start_commit(journal_t *journal, tid_t tid, bool will_wait); +int __jbd2_log_start_commit(journal_t *journal, tid_t tid, bool will_wait); +int jbd2_journal_start_commit(journal_t *journal, tid_t *tid, bool will_wait); int jbd2_journal_force_commit_nested(journal_t *journal); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); -- 1.7.1