[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <7698B921-85A0-4AD5-BBF8-28550E87C093@dilger.ca>
Date: Mon, 4 Jun 2018 12:19:54 -0600
From: Andreas Dilger <adilger@...ger.ca>
To: Wang Shilong <wangshilong1991@...il.com>
Cc: Ext4 Developers List <linux-ext4@...r.kernel.org>,
Shuichi Ihara <sihara@....com>, Wang Shilong <wshilong@....com>
Subject: Re: [PATCH v2 5/5] ext4: add suppport to make bitmaps corruptions
nonfatal
On May 29, 2018, at 5:45 AM, Wang Shilong <wangshilong1991@...il.com> wrote:
>
> From: Wang Shilong <wshilong@....com>
>
> Originally, we saw bitmaps corruptions on Lustre server,
> which could be ext4 bugs or disk corruptions, but currently
> ext4 use ext4_error() for this which make filesystem RO
> in default.
>
> Ext4 have handled bitmaps corruptions very well, use corrupted
> bit to isolate erros and prevent us any further allocations
> to this block group, add an mount option 'bitmaps=warn' for
> this which will record error state to superblock, and next
> offline e2fsck will fix inconsitency problems, this gurantee
> our Clusters usable with minimal risk.
>
> Suggested-by: Andreas Dilger <adilger.kernel@...ger.ca>
> Signed-off-by: Wang Shilong <wshilong@....com>
Reviewed-by: Andreas Dilger <adilger@...ger.ca>
> ---
> v1->v2:
> add mount option to control,
> don't leak fatal error that is not related to bitmap corruptions
> ---
> fs/ext4/balloc.c | 6 ++++--
> fs/ext4/ext4.h | 27 +++++++++++++++++++++------
> fs/ext4/mballoc.c | 45 +++++++++++++++++++++++----------------------
> fs/ext4/super.c | 20 ++++++++++++++------
> 4 files changed, 62 insertions(+), 36 deletions(-)
>
> diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
> index 5fe63ff..bef6ae8 100644
> --- a/fs/ext4/balloc.c
> +++ b/fs/ext4/balloc.c
> @@ -451,8 +451,10 @@ struct buffer_head *
> ext4_unlock_group(sb, block_group);
> unlock_buffer(bh);
> if (err) {
> - ext4_error(sb, "Failed to init block bitmap for group "
> - "%u: %d", block_group, err);
> + if (!EXT4_MB_GRP_BBITMAP_CORRUPT(
> + ext4_get_group_info(sb, block_group)))
> + ext4_error(sb, "Failed to init block bitmap for group "
> + "%u: %d", block_group, err);
> goto out;
> }
> goto verify;
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 26f8c12..96c5c1c 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -1113,6 +1113,7 @@ struct ext4_inode_info {
> #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
> #define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
> #define EXT4_MOUNT_INIT_INODE_TABLE 0x80000000 /* Initialize uninitialized itables */
> +#define EXT4_MOUNT_BITMAPS_WARN 0x100000000 /* warn on bitmaps corruptions */
>
> /*
> * Mount flags set either automatically (could not be set by mount option)
> @@ -1338,7 +1339,7 @@ struct ext4_sb_info {
> struct buffer_head * s_sbh; /* Buffer containing the super block */
> struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */
> struct buffer_head **s_group_desc;
> - unsigned int s_mount_opt;
> + unsigned long long s_mount_opt;
> unsigned int s_mount_opt2;
> unsigned int s_mount_flags;
> unsigned int s_def_mount_opt;
> @@ -2563,6 +2564,8 @@ void __ext4_grp_locked_error(const char *, unsigned int,
> struct super_block *, ext4_group_t,
> unsigned long, ext4_fsblk_t, unsigned int,
> const char *, ...);
> +void save_error_info(struct super_block *sb, const char *func,
> + unsigned int line);
>
> #define EXT4_ERROR_INODE(inode, fmt, a...) \
> ext4_error_inode((inode), __func__, __LINE__, 0, (fmt), ## a)
> @@ -2598,10 +2601,17 @@ void __ext4_grp_locked_error(const char *, unsigned int,
> do { \
> int ret; \
> ret = __ext4_mark_group_bitmap_corrupted(sb, group, \
> - flags); \
> - if (ret) \
> - __ext4_error(sb, __func__, __LINE__, \
> - fmt, ##__VA_ARGS__); \
> + flags); \
> + if (ret) { \
> + if (test_opt(sb, BITMAPS_WARN)) { \
> + __ext4_warning(sb, __func__, __LINE__, \
> + fmt, ##__VA_ARGS__); \
> + save_error_info(sb, __func__, __LINE__);\
> + } else { \
> + __ext4_error(sb, __func__, __LINE__, \
> + fmt, ##__VA_ARGS__); \
> + } \
> + } \
> } while (0)
>
>
> @@ -2656,7 +2666,12 @@ void __ext4_grp_locked_error(const char *, unsigned int,
> flags); \
> if (ret) { \
> no_printk(fmt, ##__VA_ARGS__); \
> - __ext4_error(sb, "", 0, " "); \
> + if (test_opt(sb, BITMAPS_WARN) { \
> + __ext4_warning(sb, "", 0, " "); \
> + save_error_info(sb, __func__, __LINE__);\
> + } else { \
> + __ext4_error(sb, "", 0, " "); \
> + } \
> } \
> } while (0)
>
> diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
> index 3680623..1ee3138 100644
> --- a/fs/ext4/mballoc.c
> +++ b/fs/ext4/mballoc.c
> @@ -3907,15 +3907,16 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
> bitmap_bh = ext4_read_block_bitmap(sb, group);
> if (IS_ERR(bitmap_bh)) {
> err = PTR_ERR(bitmap_bh);
> - ext4_error(sb, "Error %d reading block bitmap for %u",
> - err, group);
> return 0;
> }
>
> err = ext4_mb_load_buddy(sb, group, &e4b);
> if (err) {
> - ext4_warning(sb, "Error %d loading buddy information for %u",
> - err, group);
> + if (!EXT4_MB_GRP_BBITMAP_CORRUPT(
> + ext4_get_group_info(sb, group)))
> + ext4_warning(sb,
> + "Error %d loading buddy information for %u",
> + err, group);
> put_bh(bitmap_bh);
> return 0;
> }
> @@ -4075,16 +4076,17 @@ void ext4_discard_preallocations(struct inode *inode)
> err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
> GFP_NOFS|__GFP_NOFAIL);
> if (err) {
> - ext4_error(sb, "Error %d loading buddy information for %u",
> - err, group);
> + if (!EXT4_MB_GRP_BBITMAP_CORRUPT(
> + ext4_get_group_info(sb, group)))
> + ext4_error(sb,
> + "Error %d loading buddy information for %u",
> + err, group);
> continue;
> }
>
> bitmap_bh = ext4_read_block_bitmap(sb, group);
> if (IS_ERR(bitmap_bh)) {
> err = PTR_ERR(bitmap_bh);
> - ext4_error(sb, "Error %d reading block bitmap for %u",
> - err, group);
> ext4_mb_unload_buddy(&e4b);
> continue;
> }
> @@ -4338,8 +4340,10 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
> err = ext4_mb_load_buddy_gfp(sb, group, &e4b,
> GFP_NOFS|__GFP_NOFAIL);
> if (err) {
> - ext4_error(sb, "Error %d loading buddy information for %u",
> - err, group);
> + if (!EXT4_MB_GRP_BBITMAP_CORRUPT(
> + ext4_get_group_info(sb, group)))
> + ext4_error(sb, "Error %d loading buddy information for %u",
> + err, group);
> continue;
> }
> ext4_lock_group(sb, group);
> @@ -4819,11 +4823,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
> }
> count_clusters = EXT4_NUM_B2C(sbi, count);
> bitmap_bh = ext4_read_block_bitmap(sb, block_group);
> - if (IS_ERR(bitmap_bh)) {
> - err = PTR_ERR(bitmap_bh);
> - bitmap_bh = NULL;
> - goto error_return;
> - }
> + if (IS_ERR(bitmap_bh))
> + return;
> gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
> if (!gdp) {
> err = -EIO;
> @@ -5001,11 +5002,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
> }
>
> bitmap_bh = ext4_read_block_bitmap(sb, block_group);
> - if (IS_ERR(bitmap_bh)) {
> - err = PTR_ERR(bitmap_bh);
> - bitmap_bh = NULL;
> - goto error_return;
> - }
> + if (IS_ERR(bitmap_bh))
> + return PTR_ERR(bitmap_bh);
>
> desc = ext4_get_group_desc(sb, block_group, &gd_bh);
> if (!desc) {
> @@ -5168,8 +5166,11 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count,
>
> ret = ext4_mb_load_buddy(sb, group, &e4b);
> if (ret) {
> - ext4_warning(sb, "Error %d loading buddy information for %u",
> - ret, group);
> + if (!EXT4_MB_GRP_BBITMAP_CORRUPT(
> + ext4_get_group_info(sb, group)))
> + ext4_warning(sb,
> + "Error %d loading buddy information for %u",
> + ret, group);
> return ret;
> }
> bitmap = e4b.bd_bitmap;
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 9f88991..63f40ae 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -342,8 +342,8 @@ static void __save_error_info(struct super_block *sb, const char *func,
> le32_add_cpu(&es->s_error_count, 1);
> }
>
> -static void save_error_info(struct super_block *sb, const char *func,
> - unsigned int line)
> +void save_error_info(struct super_block *sb, const char *func,
> + unsigned int line)
> {
> __save_error_info(sb, func, line);
> ext4_commit_super(sb, 1);
> @@ -743,7 +743,8 @@ void __ext4_grp_locked_error(const char *function, unsigned int line,
> if (flags)
> __ext4_mark_group_bitmap_corrupted(sb, grp, flags);
>
> - if (test_opt(sb, ERRORS_CONT)) {
> + if (test_opt(sb, ERRORS_CONT) ||
> + (flags && test_opt(sb, BITMAPS_WARN))) {
> ext4_commit_super(sb, 0);
> return;
> }
> @@ -1373,7 +1374,8 @@ static struct dquot **ext4_get_dquots(struct inode *inode)
> };
>
> enum {
> - Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
> + Opt_bsd_df, Opt_minix_df, Opt_bitmaps_warn,
> + Opt_bitmaps_err, Opt_grpid, Opt_nogrpid,
> Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
> Opt_nouid32, Opt_debug, Opt_removed,
> Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
> @@ -1398,6 +1400,8 @@ enum {
> static const match_table_t tokens = {
> {Opt_bsd_df, "bsddf"},
> {Opt_minix_df, "minixdf"},
> + {Opt_bitmaps_warn, "bitmaps=warn"},
> + {Opt_bitmaps_err, "bitmaps=error"},
> {Opt_grpid, "grpid"},
> {Opt_grpid, "bsdgroups"},
> {Opt_nogrpid, "nogrpid"},
> @@ -1598,11 +1602,13 @@ static int clear_qf_name(struct super_block *sb, int qtype)
>
> static const struct mount_opts {
> int token;
> - int mount_opt;
> + unsigned long long mount_opt;
> int flags;
> } ext4_mount_opts[] = {
> {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
> {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
> + {Opt_bitmaps_warn, EXT4_MOUNT_BITMAPS_WARN, MOPT_SET},
> + {Opt_bitmaps_err, EXT4_MOUNT_BITMAPS_WARN, MOPT_CLEAR},
> {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
> {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
> {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
> @@ -2099,6 +2105,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
> SEQ_OPTS_PUTS("errors=continue");
> if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
> SEQ_OPTS_PUTS("errors=panic");
> + if (test_opt(sb, BITMAPS_WARN))
> + SEQ_OPTS_PUTS("bitmaps=warn");
> if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
> SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
> if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
> @@ -2197,7 +2205,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
> done:
> if (test_opt(sb, DEBUG))
> printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
> - "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
> + "bpg=%lu, ipg=%lu, mo=%04llx, mo2=%04x]\n",
> sb->s_blocksize,
> sbi->s_groups_count,
> EXT4_BLOCKS_PER_GROUP(sb),
> --
> 1.8.3.1
>
Cheers, Andreas
Download attachment "signature.asc" of type "application/pgp-signature" (874 bytes)
Powered by blists - more mailing lists