[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Sun, 5 May 2013 17:39:39 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: Jan Kara <jack@...e.cz>
Cc: Ted Tso <tytso@....edu>, linux-ext4@...r.kernel.org
Subject: Re: [PATCH 12/29] jbd2: Transaction reservation support
On Mon, Apr 08, 2013 at 11:32:17PM +0200, Jan Kara wrote:
> In some cases we cannot start a transaction because of locking constraints and
> passing started transaction into those places is not handy either because we
> could block transaction commit for too long. Transaction reservation is
> designed to solve these issues. It reserves a handle with given number of
> credits in the journal and the handle can be later attached to the running
> transaction without blocking on commit or checkpointing. Reserved handles do
> not block transaction commit in any way, they only reduce maximum size of the
> running transaction (because we have to always be prepared to accomodate
> request for attaching reserved handle).
>
> Signed-off-by: Jan Kara <jack@...e.cz>
Some minor nits below. Otherwise the patch looks good to me.
Reviewed-by: Zheng Liu <wenqing.lz@...bao.com>
> ---
> fs/jbd2/commit.c | 6 +
> fs/jbd2/journal.c | 2 +
> fs/jbd2/transaction.c | 289 +++++++++++++++++++++++++++++++++++++------------
> include/linux/jbd2.h | 21 ++++-
> 4 files changed, 245 insertions(+), 73 deletions(-)
>
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index 4863f5b..59c572e 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -522,6 +522,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> */
> jbd2_journal_switch_revoke_table(journal);
>
> + /*
> + * Reserved credits cannot be claimed anymore, free them
> + */
> + atomic_sub(atomic_read(&journal->j_reserved_credits),
> + &commit_transaction->t_outstanding_credits);
> +
> trace_jbd2_commit_flushing(journal, commit_transaction);
> stats.run.rs_flushing = jiffies;
> stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked,
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index 63e2929..04c52ac 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -1001,6 +1001,7 @@ static journal_t * journal_init_common (void)
> init_waitqueue_head(&journal->j_wait_done_commit);
> init_waitqueue_head(&journal->j_wait_commit);
> init_waitqueue_head(&journal->j_wait_updates);
> + init_waitqueue_head(&journal->j_wait_reserved);
> mutex_init(&journal->j_barrier);
> mutex_init(&journal->j_checkpoint_mutex);
> spin_lock_init(&journal->j_revoke_lock);
> @@ -1010,6 +1011,7 @@ static journal_t * journal_init_common (void)
> journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
> journal->j_min_batch_time = 0;
> journal->j_max_batch_time = 15000; /* 15ms */
> + atomic_set(&journal->j_reserved_credits, 0);
>
> /* The journal is marked for error until we succeed with recovery! */
> journal->j_flags = JBD2_ABORT;
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 9639e47..036c01c 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -89,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
> transaction->t_expires = jiffies + journal->j_commit_interval;
> spin_lock_init(&transaction->t_handle_lock);
> atomic_set(&transaction->t_updates, 0);
> - atomic_set(&transaction->t_outstanding_credits, 0);
> + atomic_set(&transaction->t_outstanding_credits,
> + atomic_read(&journal->j_reserved_credits));
> atomic_set(&transaction->t_handle_count, 0);
> INIT_LIST_HEAD(&transaction->t_inode_list);
> INIT_LIST_HEAD(&transaction->t_private_list);
> @@ -141,6 +142,91 @@ static inline void update_t_max_wait(transaction_t *transaction,
> }
>
> /*
> + * Wait until running transaction passes T_LOCKED state. Also starts the commit
> + * if needed. The function expects running transaction to exist and releases
> + * j_state_lock.
> + */
> +static void wait_transaction_locked(journal_t *journal)
> + __releases(journal->j_state_lock)
> +{
> + DEFINE_WAIT(wait);
> + int need_to_start;
> + tid_t tid = journal->j_running_transaction->t_tid;
> +
> + prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
> + TASK_UNINTERRUPTIBLE);
> + need_to_start = !tid_geq(journal->j_commit_request, tid);
> + read_unlock(&journal->j_state_lock);
> + if (need_to_start)
> + jbd2_log_start_commit(journal, tid);
> + schedule();
> + finish_wait(&journal->j_wait_transaction_locked, &wait);
> +}
> +
> +/*
> + * Wait until we can add credits for handle to the running transaction. Called
> + * with j_state_lock held for reading. Returns 0 if handle joined the running
> + * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and
> + * caller must retry.
> + */
> +static int add_transaction_credits(journal_t *journal, handle_t *handle)
> +{
> + transaction_t *t = journal->j_running_transaction;
> + int nblocks = handle->h_buffer_credits;
> + int needed;
> +
> + /*
> + * If the current transaction is locked down for commit, wait
> + * for the lock to be released.
> + */
> + if (t->t_state == T_LOCKED) {
> + wait_transaction_locked(journal);
> + return 1;
> + }
> +
> + /*
> + * If there is not enough space left in the log to write all
> + * potential buffers requested by this operation, we need to
> + * stall pending a log checkpoint to free some more log space.
> + */
> + needed = atomic_add_return(nblocks, &t->t_outstanding_credits);
> + if (needed > journal->j_max_transaction_buffers) {
> + /*
> + * If the current transaction is already too large,
> + * then start to commit it: we can then go back and
> + * attach this handle to a new transaction.
> + */
> + jbd_debug(2, "Handle %p starting new commit...\n", handle);
> + atomic_sub(nblocks, &t->t_outstanding_credits);
> + wait_transaction_locked(journal);
> + return 1;
> + }
> +
> + /*
> + * The commit code assumes that it can get enough log space
> + * without forcing a checkpoint. This is *critical* for
> + * correctness: a checkpoint of a buffer which is also
> + * associated with a committing transaction creates a deadlock,
> + * so commit simply cannot force through checkpoints.
> + *
> + * We must therefore ensure the necessary space in the journal
> + * *before* starting to dirty potentially checkpointed buffers
> + * in the new transaction.
> + */
> + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
> + jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
> + atomic_sub(nblocks, &t->t_outstanding_credits);
> + read_unlock(&journal->j_state_lock);
> + write_lock(&journal->j_state_lock);
> + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
> + __jbd2_log_wait_for_space(journal);
> + write_unlock(&journal->j_state_lock);
> + return 1;
> + }
> + return 0;
> +}
> +
> +/*
> * start_this_handle: Given a handle, deal with any locking or stalling
> * needed to make sure that there is enough journal space for the handle
> * to begin. Attach the handle to a transaction and set up the
> @@ -151,12 +237,14 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
> gfp_t gfp_mask)
> {
> transaction_t *transaction, *new_transaction = NULL;
> - tid_t tid;
> - int needed, need_to_start;
> int nblocks = handle->h_buffer_credits;
> unsigned long ts = jiffies;
>
> - if (nblocks > journal->j_max_transaction_buffers) {
> + /*
> + * 1/2 of transaction can be reserved so we can practically handle
> + * only 1/2 of maximum transaction size per operation
> + */
Sorry, but I don't understand here why we only reserve 1/2 of maximum
transaction size.
> + if (nblocks > journal->j_max_transaction_buffers / 2) {
> printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
> current->comm, nblocks,
> journal->j_max_transaction_buffers);
> @@ -223,75 +311,18 @@ repeat:
>
> transaction = journal->j_running_transaction;
>
> - /*
> - * If the current transaction is locked down for commit, wait for the
> - * lock to be released.
> - */
> - if (transaction->t_state == T_LOCKED) {
> - DEFINE_WAIT(wait);
> -
> - prepare_to_wait(&journal->j_wait_transaction_locked,
> - &wait, TASK_UNINTERRUPTIBLE);
> - read_unlock(&journal->j_state_lock);
> - schedule();
> - finish_wait(&journal->j_wait_transaction_locked, &wait);
> - goto repeat;
> - }
> -
> - /*
> - * If there is not enough space left in the log to write all potential
> - * buffers requested by this operation, we need to stall pending a log
> - * checkpoint to free some more log space.
> - */
> - needed = atomic_add_return(nblocks,
> - &transaction->t_outstanding_credits);
> -
> - if (needed > journal->j_max_transaction_buffers) {
> + if (!handle->h_reserved) {
Maybe we need to add a comment here because we release j_state_lock in
add_transaction_credits.
Regards,
- Zheng
> + if (add_transaction_credits(journal, handle))
> + goto repeat;
> + } else {
> /*
> - * If the current transaction is already too large, then start
> - * to commit it: we can then go back and attach this handle to
> - * a new transaction.
> + * We have handle reserved so we are allowed to join T_LOCKED
> + * transaction and we don't have to check for transaction size
> + * and journal space.
> */
> - DEFINE_WAIT(wait);
> -
> - jbd_debug(2, "Handle %p starting new commit...\n", handle);
> - atomic_sub(nblocks, &transaction->t_outstanding_credits);
> - prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
> - TASK_UNINTERRUPTIBLE);
> - tid = transaction->t_tid;
> - need_to_start = !tid_geq(journal->j_commit_request, tid);
> - read_unlock(&journal->j_state_lock);
> - if (need_to_start)
> - jbd2_log_start_commit(journal, tid);
> - schedule();
> - finish_wait(&journal->j_wait_transaction_locked, &wait);
> - goto repeat;
> - }
> -
> - /*
> - * The commit code assumes that it can get enough log space
> - * without forcing a checkpoint. This is *critical* for
> - * correctness: a checkpoint of a buffer which is also
> - * associated with a committing transaction creates a deadlock,
> - * so commit simply cannot force through checkpoints.
> - *
> - * We must therefore ensure the necessary space in the journal
> - * *before* starting to dirty potentially checkpointed buffers
> - * in the new transaction.
> - *
> - * The worst part is, any transaction currently committing can
> - * reduce the free space arbitrarily. Be careful to account for
> - * those buffers when checkpointing.
> - */
> - if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
> - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
> - atomic_sub(nblocks, &transaction->t_outstanding_credits);
> - read_unlock(&journal->j_state_lock);
> - write_lock(&journal->j_state_lock);
> - if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
> - __jbd2_log_wait_for_space(journal);
> - write_unlock(&journal->j_state_lock);
> - goto repeat;
> + atomic_sub(nblocks, &journal->j_reserved_credits);
> + wake_up(&journal->j_wait_reserved);
> + handle->h_reserved = 0;
> }
>
> /* OK, account for the buffers that this operation expects to
> @@ -390,6 +421,122 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
> }
> EXPORT_SYMBOL(jbd2_journal_start);
>
> +/**
> + * handle_t *jbd2_journal_reserve(journal_t *journal, int nblocks)
> + * @journal: journal to reserve transaction on.
> + * @nblocks: number of blocks we might modify
> + *
> + * This function reserves transaction with @nblocks blocks in @journal. The
> + * function waits for enough journal space to be available and possibly also
> + * for some reservations to be converted to real transactions if there are too
> + * many of them. Note that this means that calling this function while having
> + * another transaction started or reserved can cause deadlock. The returned
> + * handle cannot be used for anything until it is started using
> + * jbd2_journal_start_reserved().
> + */
> +handle_t *jbd2_journal_reserve(journal_t *journal, int nblocks,
> + unsigned int type, unsigned int line_no)
> +{
> + handle_t *handle;
> + unsigned long wanted;
> +
> + handle = new_handle(nblocks);
> + if (!handle)
> + return ERR_PTR(-ENOMEM);
> + handle->h_journal = journal;
> + handle->h_reserved = 1;
> + handle->h_type = type;
> + handle->h_line_no = line_no;
> +
> +repeat:
> + /*
> + * We need j_state_lock early to avoid transaction creation to race
> + * with us and using elevated j_reserved_credits.
> + */
> + read_lock(&journal->j_state_lock);
> + wanted = atomic_add_return(nblocks, &journal->j_reserved_credits);
> + /* We allow at most half of a transaction to be reserved */
> + if (wanted > journal->j_max_transaction_buffers / 2) {
> + atomic_sub(nblocks, &journal->j_reserved_credits);
> + read_unlock(&journal->j_state_lock);
> + wait_event(journal->j_wait_reserved,
> + atomic_read(&journal->j_reserved_credits) + nblocks
> + <= journal->j_max_transaction_buffers / 2);
> + goto repeat;
> + }
> + if (journal->j_running_transaction) {
> + transaction_t *t = journal->j_running_transaction;
> +
> + wanted = atomic_add_return(nblocks,
> + &t->t_outstanding_credits);
> + if (wanted > journal->j_max_transaction_buffers) {
> + atomic_sub(nblocks, &t->t_outstanding_credits);
> + atomic_sub(nblocks, &journal->j_reserved_credits);
> + wait_transaction_locked(journal);
> + goto repeat;
> + }
> + }
> + read_unlock(&journal->j_state_lock);
> +
> + return handle;
> +}
> +EXPORT_SYMBOL(jbd2_journal_reserve);
> +
> +void jbd2_journal_free_reserved(handle_t *handle)
> +{
> + journal_t *journal = handle->h_journal;
> +
> + atomic_sub(handle->h_buffer_credits, &journal->j_reserved_credits);
> + wake_up(&journal->j_wait_reserved);
> + jbd2_free_handle(handle);
> +}
> +EXPORT_SYMBOL(jbd2_journal_free_reserved);
> +
> +/**
> + * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle
> + * @handle: handle to start
> + *
> + * Start handle that has been previously reserved with jbd2_journal_reserve().
> + * This attaches @handle to the running transaction (or creates one if there's
> + * not transaction running). Unlike jbd2_journal_start() this function cannot
> + * block on journal commit, checkpointing, or similar stuff. It can block on
> + * memory allocation or frozen journal though.
> + *
> + * Return 0 on success, non-zero on error - handle is freed in that case.
> + */
> +int jbd2_journal_start_reserved(handle_t *handle)
> +{
> + journal_t *journal = handle->h_journal;
> + int ret = -EIO;
> +
> + if (WARN_ON(!handle->h_reserved)) {
> + /* Someone passed in normal handle? Just stop it. */
> + jbd2_journal_stop(handle);
> + return ret;
> + }
> + /*
> + * Usefulness of mixing of reserved and unreserved handles is
> + * questionable. So far nobody seems to need it so just error out.
> + */
> + if (WARN_ON(current->journal_info)) {
> + jbd2_journal_free_reserved(handle);
> + return ret;
> + }
> +
> + handle->h_journal = NULL;
> + current->journal_info = handle;
> + /*
> + * GFP_NOFS is here because callers are likely from writeback or
> + * similarly constrained call sites
> + */
> + ret = start_this_handle(journal, handle, GFP_NOFS);
> + if (ret < 0) {
> + current->journal_info = NULL;
> + jbd2_journal_free_reserved(handle);
> + }
> + return ret;
> +}
> +EXPORT_SYMBOL(jbd2_journal_start_reserved);
>
> /**
> * int jbd2_journal_extend() - extend buffer credits.
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index ad4b3bb..b3c1283 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -410,8 +410,12 @@ struct jbd2_revoke_table_s;
>
> struct jbd2_journal_handle
> {
> - /* Which compound transaction is this update a part of? */
> - transaction_t *h_transaction;
> + union {
> + /* Which compound transaction is this update a part of? */
> + transaction_t *h_transaction;
> + /* Which journal handle belongs to - used iff h_reserved set */
> + journal_t *h_journal;
> + };
>
> /* Number of remaining buffers we are allowed to dirty: */
> int h_buffer_credits;
> @@ -426,6 +430,7 @@ struct jbd2_journal_handle
> /* Flags [no locking] */
> unsigned int h_sync: 1; /* sync-on-close */
> unsigned int h_jdata: 1; /* force data journaling */
> + unsigned int h_reserved: 1; /* handle with reserved credits */
> unsigned int h_aborted: 1; /* fatal error on handle */
> unsigned int h_type: 8; /* for handle statistics */
> unsigned int h_line_no: 16; /* for handle statistics */
> @@ -689,6 +694,7 @@ jbd2_time_diff(unsigned long start, unsigned long end)
> * @j_wait_done_commit: Wait queue for waiting for commit to complete
> * @j_wait_commit: Wait queue to trigger commit
> * @j_wait_updates: Wait queue to wait for updates to complete
> + * @j_wait_reserved: Wait queue to wait for reserved buffer credits to drop
> * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints
> * @j_head: Journal head - identifies the first unused block in the journal
> * @j_tail: Journal tail - identifies the oldest still-used block in the
> @@ -702,6 +708,7 @@ jbd2_time_diff(unsigned long start, unsigned long end)
> * journal
> * @j_fs_dev: Device which holds the client fs. For internal journal this will
> * be equal to j_dev
> + * @j_reserved_credits: Number of buffers reserved from the running transaction
> * @j_maxlen: Total maximum capacity of the journal region on disk.
> * @j_list_lock: Protects the buffer lists and internal buffer state.
> * @j_inode: Optional inode where we store the journal. If present, all journal
> @@ -800,6 +807,9 @@ struct journal_s
> /* Wait queue to wait for updates to complete */
> wait_queue_head_t j_wait_updates;
>
> + /* Wait queue to wait for reserved buffer credits to drop */
> + wait_queue_head_t j_wait_reserved;
> +
> /* Semaphore for locking against concurrent checkpoints */
> struct mutex j_checkpoint_mutex;
>
> @@ -854,6 +864,9 @@ struct journal_s
> /* Total maximum capacity of the journal region on disk. */
> unsigned int j_maxlen;
>
> + /* Number of buffers reserved from the running transaction */
> + atomic_t j_reserved_credits;
> +
> /*
> * Protects the buffer lists and internal buffer state.
> */
> @@ -1094,6 +1107,10 @@ extern handle_t *jbd2__journal_start(journal_t *, int nblocks, gfp_t gfp_mask,
> unsigned int type, unsigned int line_no);
> extern int jbd2_journal_restart(handle_t *, int nblocks);
> extern int jbd2__journal_restart(handle_t *, int nblocks, gfp_t gfp_mask);
> +extern handle_t *jbd2_journal_reserve(journal_t *, int nblocks,
> + unsigned int type, unsigned int line_no);
> +extern int jbd2_journal_start_reserved(handle_t *handle);
> +extern void jbd2_journal_free_reserved(handle_t *handle);
> extern int jbd2_journal_extend (handle_t *, int nblocks);
> extern int jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
> extern int jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists