>From 3e4f42155513df7b6d36584e7b8383b86f425467 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 21 Feb 2011 17:25:37 +0100 Subject: [PATCH 2/2] jbd: Refine commit writeout logic Currently we write out all journal buffers in WRITE_SYNC mode. This improves performance for fsync heavy workloads but hinders performance when writes are mostly asynchronous. So add possibility for callers starting a transaction commit to specify whether they are going to wait for the commit and submit journal writes in WRITE_SYNC mode only in that case. Signed-off-by: Jan Kara --- fs/ext3/fsync.c | 2 +- fs/ext3/super.c | 2 +- fs/jbd/checkpoint.c | 2 +- fs/jbd/commit.c | 4 ++-- fs/jbd/journal.c | 19 ++++++++++--------- fs/jbd/transaction.c | 9 ++++----- include/linux/jbd.h | 21 ++++++++++++--------- 7 files changed, 31 insertions(+), 28 deletions(-) diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 09b13bb..5396dd6 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -81,7 +81,7 @@ int ext3_sync_file(struct file *file, int datasync) if (test_opt(inode->i_sb, BARRIER) && !journal_trans_will_send_data_barrier(journal, commit_tid)) needs_barrier = 1; - log_start_commit(journal, commit_tid); + log_start_commit(journal, commit_tid, true); ret = log_wait_commit(journal, commit_tid); /* diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 85c8cc8..58a4424 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2497,7 +2497,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait) { tid_t target; - if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { + if (journal_start_commit(EXT3_SB(sb)->s_journal, &target, true)) { if (wait) log_wait_commit(EXT3_SB(sb)->s_journal, target); } diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e4b87bc..26ca2c3 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -297,7 +297,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - log_start_commit(journal, tid); + log_start_commit(journal, tid, true); log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 34a4861..ac188cb 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -294,7 +294,7 @@ void journal_commit_transaction(journal_t *journal) int first_tag = 0; int tag_flag; int i; - int write_op = WRITE_SYNC; + int write_op = WRITE; /* * First job: lock down the current transaction and wait for @@ -332,7 +332,7 @@ void journal_commit_transaction(journal_t *journal) * we unplug the device. We don't do explicit unplugging in here, * instead we rely on sync_buffer() doing the unplug for us. */ - if (commit_transaction->t_synchronous_commit) + if (tid_geq(journal->j_commit_waited, commit_transaction->t_tid)) write_op = WRITE_SYNC_PLUG; spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index da1b5e4..5743b4c 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -434,7 +434,7 @@ int __log_space_left(journal_t *journal) /* * Called under j_state_lock. Returns true if a transaction commit was started. */ -int __log_start_commit(journal_t *journal, tid_t target) +int __log_start_commit(journal_t *journal, tid_t target, bool will_wait) { /* * Are we already doing a recent enough commit? @@ -444,7 +444,8 @@ int __log_start_commit(journal_t *journal, tid_t target) * We want a new commit: OK, mark the request and wakeup the * commit thread. We do _not_ do the commit ourselves. */ - + if (will_wait && !tid_geq(journal->j_commit_waited, target)) + journal->j_commit_waited = target; journal->j_commit_request = target; jbd_debug(1, "JBD: requesting commit %d/%d\n", journal->j_commit_request, @@ -455,12 +456,12 @@ int __log_start_commit(journal_t *journal, tid_t target) return 0; } -int log_start_commit(journal_t *journal, tid_t tid) +int log_start_commit(journal_t *journal, tid_t tid, bool will_wait) { int ret; spin_lock(&journal->j_state_lock); - ret = __log_start_commit(journal, tid); + ret = __log_start_commit(journal, tid, will_wait); spin_unlock(&journal->j_state_lock); return ret; } @@ -483,7 +484,7 @@ int journal_force_commit_nested(journal_t *journal) spin_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { transaction = journal->j_running_transaction; - __log_start_commit(journal, transaction->t_tid); + __log_start_commit(journal, transaction->t_tid, true); } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; @@ -503,7 +504,7 @@ int journal_force_commit_nested(journal_t *journal) * if a transaction is going to be committed (or is currently already * committing), and fills its tid in at *ptid */ -int journal_start_commit(journal_t *journal, tid_t *ptid) +int journal_start_commit(journal_t *journal, tid_t *ptid, bool will_wait) { int ret = 0; @@ -511,7 +512,7 @@ int journal_start_commit(journal_t *journal, tid_t *ptid) if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; - __log_start_commit(journal, tid); + __log_start_commit(journal, tid, will_wait); /* There's a running transaction and we've just made sure * it's commit has been scheduled. */ if (ptid) @@ -1439,7 +1440,7 @@ int journal_flush(journal_t *journal) /* Force everything buffered to the log... */ if (journal->j_running_transaction) { transaction = journal->j_running_transaction; - __log_start_commit(journal, transaction->t_tid); + __log_start_commit(journal, transaction->t_tid, true); } else if (journal->j_committing_transaction) transaction = journal->j_committing_transaction; @@ -1573,7 +1574,7 @@ static void __journal_abort_hard(journal_t *journal) journal->j_flags |= JFS_ABORT; transaction = journal->j_running_transaction; if (transaction) - __log_start_commit(journal, transaction->t_tid); + __log_start_commit(journal, transaction->t_tid, false); spin_unlock(&journal->j_state_lock); } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 5b2e4c3..a12c0a3 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -178,7 +178,7 @@ repeat_locked: spin_unlock(&transaction->t_handle_lock); prepare_to_wait(&journal->j_wait_transaction_locked, &wait, TASK_UNINTERRUPTIBLE); - __log_start_commit(journal, transaction->t_tid); + __log_start_commit(journal, transaction->t_tid, false); spin_unlock(&journal->j_state_lock); schedule(); finish_wait(&journal->j_wait_transaction_locked, &wait); @@ -411,7 +411,7 @@ int journal_restart(handle_t *handle, int nblocks) spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - __log_start_commit(journal, transaction->t_tid); + __log_start_commit(journal, transaction->t_tid, false); spin_unlock(&journal->j_state_lock); lock_map_release(&handle->h_lockdep_map); @@ -1431,8 +1431,6 @@ int journal_stop(handle_t *handle) } } - if (handle->h_sync) - transaction->t_synchronous_commit = 1; current->journal_info = NULL; spin_lock(&journal->j_state_lock); spin_lock(&transaction->t_handle_lock); @@ -1463,7 +1461,8 @@ int journal_stop(handle_t *handle) jbd_debug(2, "transaction too old, requesting commit for " "handle %p\n", handle); /* This is non-blocking */ - __log_start_commit(journal, transaction->t_tid); + __log_start_commit(journal, transaction->t_tid, + handle->h_sync && !(current->flags & PF_MEMALLOC)); spin_unlock(&journal->j_state_lock); /* diff --git a/include/linux/jbd.h b/include/linux/jbd.h index e069650..c38f73e 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -541,12 +541,6 @@ struct transaction_s * How many handles used this transaction? [t_handle_lock] */ int t_handle_count; - - /* - * This transaction is being forced and some process is - * waiting for it to finish. - */ - unsigned int t_synchronous_commit:1; }; /** @@ -594,6 +588,8 @@ struct transaction_s * transaction * @j_commit_request: Sequence number of the most recent transaction wanting * commit + * @j_commit_waited: Sequence number of the most recent transaction someone + * is waiting for to commit. * @j_uuid: Uuid of client object. * @j_task: Pointer to the current commit thread for this journal * @j_max_transaction_buffers: Maximum number of metadata buffers to allow in a @@ -762,6 +758,13 @@ struct journal_s tid_t j_commit_request; /* + * Sequence number of the most recent transaction someone is waiting + * for to commit. + * [j_state_lock] + */ + tid_t j_commit_waited; + + /* * Journal uuid: identifies the object (filesystem, LVM volume etc) * backed by this journal. This will eventually be replaced by an array * of uuids, allowing us to index multiple devices within a single @@ -985,9 +988,9 @@ extern void journal_switch_revoke_table(journal_t *journal); */ int __log_space_left(journal_t *); /* Called with journal locked */ -int log_start_commit(journal_t *journal, tid_t tid); -int __log_start_commit(journal_t *journal, tid_t tid); -int journal_start_commit(journal_t *journal, tid_t *tid); +int log_start_commit(journal_t *journal, tid_t tid, bool will_wait); +int __log_start_commit(journal_t *journal, tid_t tid, bool will_wait); +int journal_start_commit(journal_t *journal, tid_t *tid, bool will_wait); int journal_force_commit_nested(journal_t *journal); int log_wait_commit(journal_t *journal, tid_t tid); int log_do_checkpoint(journal_t *journal); -- 1.7.1