[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150615130844.GF4368@quack.suse.cz>
Date: Mon, 15 Jun 2015 15:08:44 +0200
From: Jan Kara <jack@...e.cz>
To: Joseph Qi <joseph.qi@...wei.com>
Cc: linux-ext4@...r.kernel.org,
"ocfs2-devel@....oracle.com" <ocfs2-devel@....oracle.com>,
Theodore Ts'o <tytso@....edu>, Jan Kara <jack@...e.cz>,
Andrew Morton <akpm@...ux-foundation.org>,
Mark Fasheh <mfasheh@...e.com>,
Joel Becker <jlbec@...lplan.org>,
Junxiao Bi <junxiao.bi@...cle.com>,
jiangyiwen <jiangyiwen@...wei.com>, linux-fsdevel@...r.kernel.org
Subject: Re: [PATCH RESEND] jbd2: fix ocfs2 corrupt when updating journal
superblock fails
On Mon 15-06-15 14:27:09, Joseph Qi wrote:
> If updating journal superblock fails after journal data has been flushed,
> the error is omitted and this will mislead the caller as a normal case.
> In ocfs2, the checkpoint will be treated successfully and the other node
> can get the lock to update. Since the sb_start is still pointing to the
> old log block, it will rewrite the journal data during journal recovery
> by the other node. Thus the new updates will be overwritten and ocfs2
> corrupts.
> So in above case we have to return the error, and ocfs2_commit_cache will
> take care of the error and prevent the other node to do update first.
> And only after recovering journal it can do the new updates.
>
> The issue discussion mail can be found at:
> https://oss.oracle.com/pipermail/ocfs2-devel/2015-June/010856.html
> http://comments.gmane.org/gmane.comp.file-systems.ext4/48841
>
> Reported-by: Yiwen Jiang <jiangyiwen@...wei.com>
> Signed-off-by: Joseph Qi <joseph.qi@...wei.com>
> Tested-by: Yiwen Jiang <jiangyiwen@...wei.com>
> Cc: Junxiao Bi <junxiao.bi@...cle.com>
> Cc: <stable@...r.kernel.org>
The patch looks good but it seems to be against relatively old kernel
version. Can you please rebase your patch against current kernel? Thanks!
Honza
> ---
> fs/jbd2/checkpoint.c | 5 ++---
> fs/jbd2/journal.c | 37 ++++++++++++++++++++++++++++++-------
> include/linux/jbd2.h | 4 ++--
> 3 files changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
> index 988b32e..82e5b7d 100644
> --- a/fs/jbd2/checkpoint.c
> +++ b/fs/jbd2/checkpoint.c
> @@ -390,7 +390,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
> unsigned long blocknr;
>
> if (is_journal_aborted(journal))
> - return 1;
> + return -EIO;
>
> if (!jbd2_journal_get_log_tail(journal, &first_tid, &blocknr))
> return 1;
> @@ -407,8 +407,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
> if (journal->j_flags & JBD2_BARRIER)
> blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
>
> - __jbd2_update_log_tail(journal, first_tid, blocknr);
> - return 0;
> + return __jbd2_update_log_tail(journal, first_tid, blocknr);
> }
>
>
> diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
> index b96bd80..6b33a42 100644
> --- a/fs/jbd2/journal.c
> +++ b/fs/jbd2/journal.c
> @@ -885,9 +885,10 @@ int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
> *
> * Requires j_checkpoint_mutex
> */
> -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
> +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
> {
> unsigned long freed;
> + int ret;
>
> BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
>
> @@ -897,7 +898,10 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
> * space and if we lose sb update during power failure we'd replay
> * old transaction with possibly newly overwritten data.
> */
> - jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
> + ret = jbd2_journal_update_sb_log_tail(journal, tid, block, WRITE_FUA);
> + if (ret)
> + goto out;
> +
> write_lock(&journal->j_state_lock);
> freed = block - journal->j_tail;
> if (block < journal->j_tail)
> @@ -913,6 +917,9 @@ void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
> journal->j_tail_sequence = tid;
> journal->j_tail = block;
> write_unlock(&journal->j_state_lock);
> +
> +out:
> + return ret;
> }
>
> /*
> @@ -1331,7 +1338,7 @@ static int journal_reset(journal_t *journal)
> return jbd2_journal_start_thread(journal);
> }
>
> -static void jbd2_write_superblock(journal_t *journal, int write_op)
> +static int jbd2_write_superblock(journal_t *journal, int write_op)
> {
> struct buffer_head *bh = journal->j_sb_buffer;
> journal_superblock_t *sb = journal->j_superblock;
> @@ -1370,7 +1377,10 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
> printk(KERN_ERR "JBD2: Error %d detected when updating "
> "journal superblock for %s.\n", ret,
> journal->j_devname);
> + jbd2_journal_abort(journal, ret);
> }
> +
> + return ret;
> }
>
> /**
> @@ -1383,10 +1393,11 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
> * Update a journal's superblock information about log tail and write it to
> * disk, waiting for the IO to complete.
> */
> -void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
> +int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
> unsigned long tail_block, int write_op)
> {
> journal_superblock_t *sb = journal->j_superblock;
> + int ret;
>
> BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
> jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
> @@ -1395,13 +1406,18 @@ void jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
> sb->s_sequence = cpu_to_be32(tail_tid);
> sb->s_start = cpu_to_be32(tail_block);
>
> - jbd2_write_superblock(journal, write_op);
> + ret = jbd2_write_superblock(journal, write_op);
> + if (ret)
> + goto out;
>
> /* Log is no longer empty */
> write_lock(&journal->j_state_lock);
> WARN_ON(!sb->s_sequence);
> journal->j_flags &= ~JBD2_FLUSHED;
> write_unlock(&journal->j_state_lock);
> +
> +out:
> + return ret;
> }
>
> /**
> @@ -1950,7 +1966,13 @@ int jbd2_journal_flush(journal_t *journal)
> return -EIO;
>
> mutex_lock(&journal->j_checkpoint_mutex);
> - jbd2_cleanup_journal_tail(journal);
> + if (!err) {
> + err = jbd2_cleanup_journal_tail(journal);
> + if (err < 0) {
> + mutex_unlock(&journal->j_checkpoint_mutex);
> + goto out;
> + }
> + }
>
> /* Finally, mark the journal as really needing no recovery.
> * This sets s_start==0 in the underlying superblock, which is
> @@ -1966,7 +1988,8 @@ int jbd2_journal_flush(journal_t *journal)
> J_ASSERT(journal->j_head == journal->j_tail);
> J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
> write_unlock(&journal->j_state_lock);
> - return 0;
> +out:
> + return err;
> }
>
> /**
> diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
> index 20e7f78..edb640a 100644
> --- a/include/linux/jbd2.h
> +++ b/include/linux/jbd2.h
> @@ -1035,7 +1035,7 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal);
> int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
> int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
> unsigned long *block);
> -void __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
> +int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
> void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
>
> /* Commit management */
> @@ -1157,7 +1157,7 @@ extern int jbd2_journal_recover (journal_t *journal);
> extern int jbd2_journal_wipe (journal_t *, int);
> extern int jbd2_journal_skip_recovery (journal_t *);
> extern void jbd2_journal_update_sb_errno(journal_t *);
> -extern void jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
> +extern int jbd2_journal_update_sb_log_tail (journal_t *, tid_t,
> unsigned long, int);
> extern void __jbd2_journal_abort_hard (journal_t *);
> extern void jbd2_journal_abort (journal_t *, int);
> --
> 1.8.4.3
>
>
--
Jan Kara <jack@...e.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists