[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200805141623.GC16475@quack2.suse.cz>
Date: Wed, 5 Aug 2020 16:16:23 +0200
From: Jan Kara <jack@...e.cz>
To: Ted Tso <tytso@....edu>
Cc: linux-ext4@...r.kernel.org, Lukas Czerner <lczerner@...hat.com>,
Jan Kara <jack@...e.cz>
Subject: Re: [PATCH v3] ext4: don't BUG on inconsistent journal feature
On Fri 10-07-20 16:07:59, Jan Kara wrote:
> A customer has reported a BUG_ON in ext4_clear_journal_err() hitting
> during an LTP testing. Either this has been caused by a test setup
> issue where the filesystem was being overwritten while LTP was mounting
> it or the journal replay has overwritten the superblock with invalid
> data. In either case it is preferable we don't take the machine down
> with a BUG_ON. So handle the situation of unexpectedly missing
> has_journal feature more gracefully. We issue warning and fail the mount
> in the cases where the race window is narrow and the failed check is
> most likely a programming error. In cases where fs corruption is more
> likely, we do full ext4_error() handling before failing mount / remount.
>
> Reviewed-by: Lukas Czerner <lczerner@...hat.com>
> Signed-off-by: Jan Kara <jack@...e.cz>
Ted, can you please pick up this patch? Thanks!
Honza
> ---
> fs/ext4/super.c | 68 +++++++++++++++++++++++++++++++++++++++------------------
> 1 file changed, 47 insertions(+), 21 deletions(-)
>
> Changes since v2:
> - added Reviewed-by tag
> - fixup compilation failure without CONFIG_QUOTA
>
> Changes since v1:
> - handle failures more likely coming from fs corruption as with ext4_error
>
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 330957ed1f05..9fdad843b30e 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -66,10 +66,10 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
> unsigned long journal_devnum);
> static int ext4_show_options(struct seq_file *seq, struct dentry *root);
> static int ext4_commit_super(struct super_block *sb, int sync);
> -static void ext4_mark_recovery_complete(struct super_block *sb,
> +static int ext4_mark_recovery_complete(struct super_block *sb,
> struct ext4_super_block *es);
> -static void ext4_clear_journal_err(struct super_block *sb,
> - struct ext4_super_block *es);
> +static int ext4_clear_journal_err(struct super_block *sb,
> + struct ext4_super_block *es);
> static int ext4_sync_fs(struct super_block *sb, int wait);
> static int ext4_remount(struct super_block *sb, int *flags, char *data);
> static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
> @@ -4770,7 +4770,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
> if (needs_recovery) {
> ext4_msg(sb, KERN_INFO, "recovery complete");
> - ext4_mark_recovery_complete(sb, es);
> + err = ext4_mark_recovery_complete(sb, es);
> + if (err)
> + goto failed_mount8;
> }
> if (EXT4_SB(sb)->s_journal) {
> if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
> @@ -4813,10 +4815,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
> ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
> goto failed_mount;
>
> -#ifdef CONFIG_QUOTA
> failed_mount8:
> ext4_unregister_sysfs(sb);
> -#endif
> failed_mount7:
> ext4_unregister_li_request(sb);
> failed_mount6:
> @@ -4956,7 +4956,8 @@ static journal_t *ext4_get_journal(struct super_block *sb,
> struct inode *journal_inode;
> journal_t *journal;
>
> - BUG_ON(!ext4_has_feature_journal(sb));
> + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> + return NULL;
>
> journal_inode = ext4_get_journal_inode(sb, journal_inum);
> if (!journal_inode)
> @@ -4986,7 +4987,8 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
> struct ext4_super_block *es;
> struct block_device *bdev;
>
> - BUG_ON(!ext4_has_feature_journal(sb));
> + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> + return NULL;
>
> bdev = ext4_blkdev_get(j_dev, sb);
> if (bdev == NULL)
> @@ -5078,7 +5080,8 @@ static int ext4_load_journal(struct super_block *sb,
> int err = 0;
> int really_read_only;
>
> - BUG_ON(!ext4_has_feature_journal(sb));
> + if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
> + return -EFSCORRUPTED;
>
> if (journal_devnum &&
> journal_devnum != le32_to_cpu(es->s_journal_dev)) {
> @@ -5148,7 +5151,12 @@ static int ext4_load_journal(struct super_block *sb,
> }
>
> EXT4_SB(sb)->s_journal = journal;
> - ext4_clear_journal_err(sb, es);
> + err = ext4_clear_journal_err(sb, es);
> + if (err) {
> + EXT4_SB(sb)->s_journal = NULL;
> + jbd2_journal_destroy(journal);
> + return err;
> + }
>
> if (!really_read_only && journal_devnum &&
> journal_devnum != le32_to_cpu(es->s_journal_dev)) {
> @@ -5244,26 +5252,32 @@ static int ext4_commit_super(struct super_block *sb, int sync)
> * remounting) the filesystem readonly, then we will end up with a
> * consistent fs on disk. Record that fact.
> */
> -static void ext4_mark_recovery_complete(struct super_block *sb,
> - struct ext4_super_block *es)
> +static int ext4_mark_recovery_complete(struct super_block *sb,
> + struct ext4_super_block *es)
> {
> + int err;
> journal_t *journal = EXT4_SB(sb)->s_journal;
>
> if (!ext4_has_feature_journal(sb)) {
> - BUG_ON(journal != NULL);
> - return;
> + if (journal != NULL) {
> + ext4_error(sb, "Journal got removed while the fs was "
> + "mounted!");
> + return -EFSCORRUPTED;
> + }
> + return 0;
> }
> jbd2_journal_lock_updates(journal);
> - if (jbd2_journal_flush(journal) < 0)
> + err = jbd2_journal_flush(journal);
> + if (err < 0)
> goto out;
>
> if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
> ext4_clear_feature_journal_needs_recovery(sb);
> ext4_commit_super(sb, 1);
> }
> -
> out:
> jbd2_journal_unlock_updates(journal);
> + return err;
> }
>
> /*
> @@ -5271,14 +5285,17 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
> * has recorded an error from a previous lifetime, move that error to the
> * main filesystem now.
> */
> -static void ext4_clear_journal_err(struct super_block *sb,
> +static int ext4_clear_journal_err(struct super_block *sb,
> struct ext4_super_block *es)
> {
> journal_t *journal;
> int j_errno;
> const char *errstr;
>
> - BUG_ON(!ext4_has_feature_journal(sb));
> + if (!ext4_has_feature_journal(sb)) {
> + ext4_error(sb, "Journal got removed while the fs was mounted!");
> + return -EFSCORRUPTED;
> + }
>
> journal = EXT4_SB(sb)->s_journal;
>
> @@ -5303,6 +5320,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
> jbd2_journal_clear_err(journal);
> jbd2_journal_update_sb_errno(journal);
> }
> + return 0;
> }
>
> /*
> @@ -5573,8 +5591,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> (sbi->s_mount_state & EXT4_VALID_FS))
> es->s_state = cpu_to_le16(sbi->s_mount_state);
>
> - if (sbi->s_journal)
> + if (sbi->s_journal) {
> + /*
> + * We let remount-ro finish even if marking fs
> + * as clean failed...
> + */
> ext4_mark_recovery_complete(sb, es);
> + }
> if (sbi->s_mmp_tsk)
> kthread_stop(sbi->s_mmp_tsk);
> } else {
> @@ -5622,8 +5645,11 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> * been changed by e2fsck since we originally mounted
> * the partition.)
> */
> - if (sbi->s_journal)
> - ext4_clear_journal_err(sb, es);
> + if (sbi->s_journal) {
> + err = ext4_clear_journal_err(sb, es);
> + if (err)
> + goto restore_opts;
> + }
> sbi->s_mount_state = le16_to_cpu(es->s_state);
>
> err = ext4_setup_super(sb, es, 0);
> --
> 2.16.4
>
--
Jan Kara <jack@...e.com>
SUSE Labs, CR
Powered by blists - more mailing lists