[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20130412080148.GA6799@gmail.com>
Date: Fri, 12 Apr 2013 16:01:49 +0800
From: Zheng Liu <gnehzuil.liu@...il.com>
To: Jan Kara <jack@...e.cz>
Cc: Ted Tso <tytso@....edu>, linux-ext4@...r.kernel.org
Subject: Re: [PATCH 05/29] jbd2: Don't create journal_head for temporary
journal buffers
On Mon, Apr 08, 2013 at 11:32:10PM +0200, Jan Kara wrote:
> When writing metadata to the journal, we create temporary buffer heads
> for that task. We also attach journal heads to these buffer heads but
> the only purpose of the journal heads is to keep buffers linked in
> transaction's BJ_IO list. We remove the need for journal heads by
> reusing buffer_head's b_assoc_buffers list for that purpose. Also since
> BJ_IO list is just a temporary list for transaction commit, we use a
> private list in jbd2_journal_commit_transaction() for that thus removing
> BJ_IO list from transaction completely.
>
> Signed-off-by: Jan Kara <jack@...e.cz>
Reviewed-by: Zheng Liu <wenqing.lz@...bao.com>
Regards,
- Zheng
> ---
> fs/jbd2/checkpoint.c | 1 -
> fs/jbd2/commit.c | 65 ++++++++++++++++--------------------------------
> fs/jbd2/journal.c | 36 ++++++++++----------------
> fs/jbd2/transaction.c | 14 +++-------
> include/linux/jbd2.h | 32 ++++++++++++------------
> 5 files changed, 56 insertions(+), 92 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index c78841e..2735fef 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -690,7 +690,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
> J_ASSERT(transaction->t_state == T_FINISHED);
> J_ASSERT(transaction->t_buffers == NULL);
> J_ASSERT(transaction->t_forget == NULL);
> - J_ASSERT(transaction->t_iobuf_list == NULL);
> J_ASSERT(transaction->t_shadow_list == NULL);
> J_ASSERT(transaction->t_log_list == NULL);
> J_ASSERT(transaction->t_checkpoint_list == NULL);
> diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
> index 750c701..c503df6 100644
> --- a/fs/jbd2/commit.c
> +++ b/fs/jbd2/commit.c
> @@ -368,7 +368,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> {
> struct transaction_stats_s stats;
> transaction_t *commit_transaction;
> - struct journal_head *jh, *new_jh, *descriptor;
> + struct journal_head *jh, *descriptor;
> struct buffer_head **wbuf = journal->j_wbuf;
> int bufs;
> int flags;
> @@ -392,6 +392,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> tid_t first_tid;
> int update_tail;
> int csum_size = 0;
> + LIST_HEAD(io_bufs);
>
> if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
> csum_size = sizeof(struct jbd2_journal_block_tail);
> @@ -658,29 +659,22 @@ void jbd2_journal_commit_transaction(journal_t *journal)
>
> /* Bump b_count to prevent truncate from stumbling over
> the shadowed buffer! @@@ This can go if we ever get
> - rid of the BJ_IO/BJ_Shadow pairing of buffers. */
> + rid of the shadow pairing of buffers. */
> atomic_inc(&jh2bh(jh)->b_count);
>
> - /* Make a temporary IO buffer with which to write it out
> - (this will requeue both the metadata buffer and the
> - temporary IO buffer). new_bh goes on BJ_IO*/
> -
> - set_bit(BH_JWrite, &jh2bh(jh)->b_state);
> /*
> - * akpm: jbd2_journal_write_metadata_buffer() sets
> - * new_bh->b_transaction to commit_transaction.
> - * We need to clean this up before we release new_bh
> - * (which is of type BJ_IO)
> + * Make a temporary IO buffer with which to write it out
> + * (this will requeue the metadata buffer to BJ_Shadow).
> */
> + set_bit(BH_JWrite, &jh2bh(jh)->b_state);
> JBUFFER_TRACE(jh, "ph3: write metadata");
> flags = jbd2_journal_write_metadata_buffer(commit_transaction,
> - jh, &new_jh, blocknr);
> + jh, &wbuf[bufs], blocknr);
> if (flags < 0) {
> jbd2_journal_abort(journal, flags);
> continue;
> }
> - set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
> - wbuf[bufs++] = jh2bh(new_jh);
> + jbd2_file_log_bh(&io_bufs, wbuf[bufs]);
>
> /* Record the new block's tag in the current descriptor
> buffer */
> @@ -694,10 +688,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
> tag = (journal_block_tag_t *) tagp;
> write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
> tag->t_flags = cpu_to_be16(tag_flag);
> - jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh),
> + jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
> commit_transaction->t_tid);
> tagp += tag_bytes;
> space_left -= tag_bytes;
> + bufs++;
>
> if (first_tag) {
> memcpy (tagp, journal->j_uuid, 16);
> @@ -809,7 +804,7 @@ start_journal_io:
> the log. Before we can commit it, wait for the IO so far to
> complete. Control buffers being written are on the
> transaction's t_log_list queue, and metadata buffers are on
> - the t_iobuf_list queue.
> + the io_bufs list.
>
> Wait for the buffers in reverse order. That way we are
> less likely to be woken up until all IOs have completed, and
> @@ -818,46 +813,31 @@ start_journal_io:
>
> jbd_debug(3, "JBD2: commit phase 3\n");
>
> - /*
> - * akpm: these are BJ_IO, and j_list_lock is not needed.
> - * See __journal_try_to_free_buffer.
> - */
> -wait_for_iobuf:
> - while (commit_transaction->t_iobuf_list != NULL) {
> - struct buffer_head *bh;
> + while (!list_empty(&io_bufs)) {
> + struct buffer_head *bh = list_entry(io_bufs.prev,
> + struct buffer_head,
> + b_assoc_buffers);
>
> - jh = commit_transaction->t_iobuf_list->b_tprev;
> - bh = jh2bh(jh);
> - if (buffer_locked(bh)) {
> - wait_on_buffer(bh);
> - goto wait_for_iobuf;
> - }
> - if (cond_resched())
> - goto wait_for_iobuf;
> + wait_on_buffer(bh);
> + cond_resched();
>
> if (unlikely(!buffer_uptodate(bh)))
> err = -EIO;
> -
> - clear_buffer_jwrite(bh);
> -
> - JBUFFER_TRACE(jh, "ph4: unfile after journal write");
> - jbd2_journal_unfile_buffer(journal, jh);
> + jbd2_unfile_log_bh(bh);
>
> /*
> - * ->t_iobuf_list should contain only dummy buffer_heads
> - * which were created by jbd2_journal_write_metadata_buffer().
> + * The list contains temporary buffer heads created by
> + * jbd2_journal_write_metadata_buffer().
> */
> BUFFER_TRACE(bh, "dumping temporary bh");
> - jbd2_journal_put_journal_head(jh);
> __brelse(bh);
> J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
> free_buffer_head(bh);
>
> - /* We also have to unlock and free the corresponding
> - shadowed buffer */
> + /* We also have to refile the corresponding shadowed buffer */
> jh = commit_transaction->t_shadow_list->b_tprev;
> bh = jh2bh(jh);
> - clear_bit(BH_JWrite, &bh->b_state);
> + clear_buffer_jwrite(bh);
> J_ASSERT_BH(bh, buffer_jbddirty(bh));
>
> /* The metadata is now released for reuse, but we need
> @@ -952,7 +932,6 @@ wait_for_iobuf:
> J_ASSERT(list_empty(&commit_transaction->t_inode_list));
> J_ASSERT(commit_transaction->t_buffers == NULL);
> J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
> - J_ASSERT(commit_transaction->t_iobuf_list == NULL);
> J_ASSERT(commit_transaction->t_shadow_list == NULL);
> J_ASSERT(commit_transaction->t_log_list == NULL);
>
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index ed10991..eb6272b 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -310,14 +310,12 @@ static void journal_kill_thread(journal_t *journal)
> *
> * If the source buffer has already been modified by a new transaction
> * since we took the last commit snapshot, we use the frozen copy of
> - * that data for IO. If we end up using the existing buffer_head's data
> - * for the write, then we *have* to lock the buffer to prevent anyone
> - * else from using and possibly modifying it while the IO is in
> - * progress.
> + * that data for IO. If we end up using the existing buffer_head's data
> + * for the write, then we have to make sure nobody modifies it while the
> + * IO is in progress. do_get_write_access() handles this.
> *
> - * The function returns a pointer to the buffer_heads to be used for IO.
> - *
> - * We assume that the journal has already been locked in this function.
> + * The function returns a pointer to the buffer_head to be used for IO.
> + *
> *
> * Return value:
> * <0: Error
> @@ -330,15 +328,14 @@ static void journal_kill_thread(journal_t *journal)
>
> int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
> struct journal_head *jh_in,
> - struct journal_head **jh_out,
> - unsigned long long blocknr)
> + struct buffer_head **bh_out,
> + sector_t blocknr)
> {
> int need_copy_out = 0;
> int done_copy_out = 0;
> int do_escape = 0;
> char *mapped_data;
> struct buffer_head *new_bh;
> - struct journal_head *new_jh;
> struct page *new_page;
> unsigned int new_offset;
> struct buffer_head *bh_in = jh2bh(jh_in);
> @@ -370,14 +367,13 @@ retry_alloc:
> new_bh->b_state = 0;
> init_buffer(new_bh, NULL, NULL);
> atomic_set(&new_bh->b_count, 1);
> - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */
>
> + jbd_lock_bh_state(bh_in);
> +repeat:
> /*
> * If a new transaction has already done a buffer copy-out, then
> * we use that version of the data for the commit.
> */
> - jbd_lock_bh_state(bh_in);
> -repeat:
> if (jh_in->b_frozen_data) {
> done_copy_out = 1;
> new_page = virt_to_page(jh_in->b_frozen_data);
> @@ -417,7 +413,7 @@ repeat:
> jbd_unlock_bh_state(bh_in);
> tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
> if (!tmp) {
> - jbd2_journal_put_journal_head(new_jh);
> + brelse(new_bh);
> return -ENOMEM;
> }
> jbd_lock_bh_state(bh_in);
> @@ -428,7 +424,7 @@ repeat:
>
> jh_in->b_frozen_data = tmp;
> mapped_data = kmap_atomic(new_page);
> - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
> + memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
> kunmap_atomic(mapped_data);
>
> new_page = virt_to_page(tmp);
> @@ -454,14 +450,13 @@ repeat:
> }
>
> set_bh_page(new_bh, new_page, new_offset);
> - new_jh->b_transaction = NULL;
> - new_bh->b_size = jh2bh(jh_in)->b_size;
> - new_bh->b_bdev = transaction->t_journal->j_dev;
> + new_bh->b_size = bh_in->b_size;
> + new_bh->b_bdev = journal->j_dev;
> new_bh->b_blocknr = blocknr;
> set_buffer_mapped(new_bh);
> set_buffer_dirty(new_bh);
>
> - *jh_out = new_jh;
> + *bh_out = new_bh;
>
> /*
> * The to-be-written buffer needs to get moved to the io queue,
> @@ -474,9 +469,6 @@ repeat:
> spin_unlock(&journal->j_list_lock);
> jbd_unlock_bh_state(bh_in);
>
> - JBUFFER_TRACE(new_jh, "file as BJ_IO");
> - jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
> -
> return do_escape | (done_copy_out << 1);
> }
>
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index d6ee5ae..3be34c7 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -1589,10 +1589,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
> * Remove a buffer from the appropriate transaction list.
> *
> * Note that this function can *change* the value of
> - * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
> - * t_log_list or t_reserved_list. If the caller is holding onto a copy of one
> - * of these pointers, it could go bad. Generally the caller needs to re-read
> - * the pointer from the transaction_t.
> + * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or
> + * t_reserved_list. If the caller is holding onto a copy of one of these
> + * pointers, it could go bad. Generally the caller needs to re-read the
> + * pointer from the transaction_t.
> *
> * Called under j_list_lock.
> */
> @@ -1622,9 +1622,6 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
> case BJ_Forget:
> list = &transaction->t_forget;
> break;
> - case BJ_IO:
> - list = &transaction->t_iobuf_list;
> - break;
> case BJ_Shadow:
> list = &transaction->t_shadow_list;
> break;
> @@ -2126,9 +2123,6 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
> case BJ_Forget:
> list = &transaction->t_forget;
> break;
> - case BJ_IO:
> - list = &transaction->t_iobuf_list;
> - break;
> case BJ_Shadow:
> list = &transaction->t_shadow_list;
> break;
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 50e5a5e..a670595 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -523,12 +523,6 @@ struct transaction_s
> struct journal_head *t_checkpoint_io_list;
>
> /*
> - * Doubly-linked circular list of temporary buffers currently undergoing
> - * IO in the log [j_list_lock]
> - */
> - struct journal_head *t_iobuf_list;
> -
> - /*
> * Doubly-linked circular list of metadata buffers being shadowed by log
> * IO. The IO buffers on the iobuf list and the shadow buffers on this
> * list match each other one for one at all times. [j_list_lock]
> @@ -990,6 +984,14 @@ extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, i
> extern void __journal_free_buffer(struct journal_head *bh);
> extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
> extern void __journal_clean_data_list(transaction_t *transaction);
> +static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh)
> +{
> + list_add_tail(&bh->b_assoc_buffers, head);
> +}
> +static inline void jbd2_unfile_log_bh(struct buffer_head *bh)
> +{
> + list_del_init(&bh->b_assoc_buffers);
> +}
>
> /* Log buffer allocation */
> extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
> @@ -1038,11 +1040,10 @@ extern void jbd2_buffer_abort_trigger(struct journal_head *jh,
> struct jbd2_buffer_trigger_type *triggers);
>
> /* Buffer IO */
> -extern int
> -jbd2_journal_write_metadata_buffer(transaction_t *transaction,
> - struct journal_head *jh_in,
> - struct journal_head **jh_out,
> - unsigned long long blocknr);
> +extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
> + struct journal_head *jh_in,
> + struct buffer_head **bh_out,
> + sector_t blocknr);
>
> /* Transaction locking */
> extern void __wait_on_journal (journal_t *);
> @@ -1284,11 +1285,10 @@ static inline int jbd_space_needed(journal_t *journal)
> #define BJ_None 0 /* Not journaled */
> #define BJ_Metadata 1 /* Normal journaled metadata */
> #define BJ_Forget 2 /* Buffer superseded by this transaction */
> -#define BJ_IO 3 /* Buffer is for temporary IO use */
> -#define BJ_Shadow 4 /* Buffer contents being shadowed to the log */
> -#define BJ_LogCtl 5 /* Buffer contains log descriptors */
> -#define BJ_Reserved 6 /* Buffer is reserved for access by journal */
> -#define BJ_Types 7
> +#define BJ_Shadow 3 /* Buffer contents being shadowed to the log */
> +#define BJ_LogCtl 4 /* Buffer contains log descriptors */
> +#define BJ_Reserved 5 /* Buffer is reserved for access by journal */
> +#define BJ_Types 6
>
> extern int jbd_blocks_per_page(struct inode *inode);
>
> --
> 1.7.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists