lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <A963F3A9-2106-4919-AE7C-8700F60A17EF@dilger.ca>
Date:   Tue, 1 Jun 2021 17:27:47 -0600
From:   Andreas Dilger <adilger@...ger.ca>
To:     Artem Blagodarenko <artem.blagodarenko@...il.com>
Cc:     Ext4 Developers List <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH] e2fsck: replay all commits except broken ones

On May 27, 2021, at 2:57 AM, Artem Blagodarenko <artem.blagodarenko@...il.com> wrote:
> 
> E2fsck interrupts journal replay if a broken transaction is found.
> Journal is replayed partly. Some useful transactions are not replayed.
> 
> Let's change this behavior and process all transactions except broken ones
> if "-E repair_journal" option is set.

In the past we discussed using the per-block checksums in the journal
(which I think are now widely available, maybe only with metacsum?)
to detect which blocks in the journal are corrupted, and only skip
replay of those blocks.  If the same block is overwritten in a later
journal replay phase (e.g. common case for bitmaps, group descriptors,
busy inodes, etc), then no extra action is needed by e2fsck (in theory,
though we may still want to run an e2fsck to find *other* corruption).

The kernel could probably do the same, to avoid aborting a mount due to
small journal corruption.  If the corrupted blocks are bitmaps, then the
filesystem could maybe still continue to mount, with those groups marked
EXT4_GROUP_INFO_BBITMAP_CORRUPT or EXT4_GROUP_INFO_IBITMAP_CORRUPT so
they are skipped during allocation, and only mark the filesystem in error.
If there are other blocks corrupted, the mount should probably still fail.

Cheers, Andreas

> 
> HPE-bug-id: LUS-9452
> ---
> e2fsck/e2fsck.h         |  1 +
> e2fsck/journal.c        |  3 +++
> e2fsck/recovery.c       | 41 +++++++++++++++++++++++++++++++----------
> e2fsck/unix.c           |  3 +++
> lib/e2p/feature.c       |  2 ++
> lib/ext2fs/kernel-jbd.h |  5 ++++-
> misc/ext4.5.in          |  4 ++++
> 7 files changed, 48 insertions(+), 11 deletions(-)
> 
> diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
> index 15d043ee..7dccc8e4 100644
> --- a/e2fsck/e2fsck.h
> +++ b/e2fsck/e2fsck.h
> @@ -179,6 +179,7 @@ struct resource_track {
> #define E2F_OPT_UNSHARE_BLOCKS  0x40000
> #define E2F_OPT_CLEAR_UNINIT	0x80000 /* Hack to clear the uninit bit */
> #define E2F_OPT_CHECK_ENCODING  0x100000 /* Force verification of encoded filenames */
> +#define E2F_OPT_REPAIR_JOURNAL	0x200000 /* Apply all possible from journal */
> 
> /*
>  * E2fsck flags
> diff --git a/e2fsck/journal.c b/e2fsck/journal.c
> index a425bbd1..9b84c477 100644
> --- a/e2fsck/journal.c
> +++ b/e2fsck/journal.c
> @@ -1620,6 +1620,9 @@ static errcode_t recover_ext3_journal(e2fsck_t ctx)
> 	if (retval)
> 		return retval;
> 
> +	if (ctx->options & E2F_OPT_REPAIR_JOURNAL)
> +		jbd2_set_feature_repair(journal);
> +
> 	retval = e2fsck_journal_load(journal);
> 	if (retval)
> 		goto errout;
> diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c
> index dc0694fc..5aa3e529 100644
> --- a/e2fsck/recovery.c
> +++ b/e2fsck/recovery.c
> @@ -33,6 +33,7 @@ struct recovery_info
> 	int		nr_replays;
> 	int		nr_revokes;
> 	int		nr_revoke_hits;
> +	int             nr_skipped;
> };
> 
> static int do_one_pass(journal_t *journal,
> @@ -313,8 +314,9 @@ int jbd2_journal_recover(journal_t *journal)
> 	jbd_debug(1, "JBD2: recovery, exit status %d, "
> 		  "recovered transactions %u to %u\n",
> 		  err, info.start_transaction, info.end_transaction);
> -	jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
> -		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
> +	jbd_debug(1, "JBD2: Replayed %d, skipped %d, and revoked %d/%d blocks\n",
> +		  info.nr_replays, info.nr_skipped, info.nr_revoke_hits,
> +		  info.nr_revokes);
> 
> 	/* Restart the log at the next transaction ID, thus invalidating
> 	 * any existing commit records in the log. */
> @@ -787,16 +789,35 @@ static int do_one_pass(journal_t *journal,
> 				}
> 
> 				/* Neither checksum match nor unused? */
> -				if (!((crc32_sum == found_chksum &&
> -				       cbh->h_chksum_type ==
> -						JBD2_CRC32_CHKSUM &&
> -				       cbh->h_chksum_size ==
> -						JBD2_CRC32_CHKSUM_SIZE) ||
> -				      (cbh->h_chksum_type == 0 &&
> -				       cbh->h_chksum_size == 0 &&
> -				       found_chksum == 0)))
> +				if (cbh->h_chksum_type == 0 &&
> +				    cbh->h_chksum_size == 0 &&
> +				    found_chksum == 0)
> 					goto chksum_error;
> 
> +				if (!(crc32_sum = found_chksum &&
> +				    cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
> +				    cbh->h_chksum_size ==
> +						JBD2_CRC32_CHKSUM_SIZE)) {
> +					if (jbd2_has_feature_repair(journal)) {
> +						/*
> +						 * Commit with wrong checksum.
> +						 * Let's skip it. There are
> +						 * some corect commits after.
> +						*/
> +						++info->nr_skipped;
> +						jbd_debug(1, "JBD2: "
> +							  "crc32_sum(0x%x)i !="
> +							  " found_chksum(0x%x)"
> +							  ". Skipped.\n",
> +							  crc32_sum,
> +							  found_chksum);
> +						brelse(bh);
> +						next_commit_ID++;
> +					} else {
> +						goto chksum_error;
> +					}
> +				}
> +
> 				crc32_sum = ~0;
> 			}
> 			if (pass == PASS_SCAN &&
> diff --git a/e2fsck/unix.c b/e2fsck/unix.c
> index c5f9e441..fc7649fc 100644
> --- a/e2fsck/unix.c
> +++ b/e2fsck/unix.c
> @@ -763,6 +763,9 @@ static void parse_extended_opts(e2fsck_t ctx, const char *opts)
> 			ctx->options |= E2F_OPT_CLEAR_UNINIT;
> 			continue;
> #endif
> +		} else if (strcmp(token, "repair_journal") == 0) {
> +			ctx->options |= E2F_OPT_REPAIR_JOURNAL;
> +			continue;
> 		} else {
> 			fprintf(stderr, _("Unknown extended option: %s\n"),
> 				token);
> diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c
> index 22910602..6cd11384 100644
> --- a/lib/e2p/feature.c
> +++ b/lib/e2p/feature.c
> @@ -134,6 +134,8 @@ static struct feature jrnl_feature_list[] = {
>                        "journal_checksum_v2" },
>        {       E2P_FEATURE_INCOMPAT, JBD2_FEATURE_INCOMPAT_CSUM_V3,
>                        "journal_checksum_v3" },
> +       {       E2P_FEATURE_INCOMPAT, JBD2_FEATURE_INCOMPAT_REPAIR,
> +			"journal_repair" },
>        {       0, 0, 0 },
> };
> 
> diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h
> index 2978ccb6..4a2cef20 100644
> --- a/lib/ext2fs/kernel-jbd.h
> +++ b/lib/ext2fs/kernel-jbd.h
> @@ -265,6 +265,7 @@ typedef struct journal_superblock_s
> #define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
> #define JBD2_FEATURE_INCOMPAT_CSUM_V3		0x00000010
> #define JBD2_FEATURE_INCOMPAT_FAST_COMMIT	0x00000020
> +#define JBD2_FEATURE_INCOMPAT_REPAIR		0x00000040
> 
> /* Features known to this kernel version: */
> #define JBD2_KNOWN_COMPAT_FEATURES	0
> @@ -274,7 +275,8 @@ typedef struct journal_superblock_s
> 					 JBD2_FEATURE_INCOMPAT_64BIT|\
> 					 JBD2_FEATURE_INCOMPAT_CSUM_V2|	\
> 					 JBD2_FEATURE_INCOMPAT_CSUM_V3 | \
> -					 JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
> +					 JBD2_FEATURE_INCOMPAT_FAST_COMMIT | \
> +					 JBD2_FEATURE_INCOMPAT_REPAIR)
> 
> #ifdef NO_INLINE_FUNCS
> extern size_t journal_tag_bytes(journal_t *journal);
> @@ -392,6 +394,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(async_commit,	ASYNC_COMMIT)
> JBD2_FEATURE_INCOMPAT_FUNCS(csum2,		CSUM_V2)
> JBD2_FEATURE_INCOMPAT_FUNCS(csum3,		CSUM_V3)
> JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit,	FAST_COMMIT)
> +JBD2_FEATURE_INCOMPAT_FUNCS(repair,		REPAIR)
> 
> #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS))
> /*
> diff --git a/misc/ext4.5.in b/misc/ext4.5.in
> index 90bc4f88..95266864 100644
> --- a/misc/ext4.5.in
> +++ b/misc/ext4.5.in
> @@ -574,6 +574,10 @@ Commit block can be written to disk without waiting for descriptor blocks. If
> enabled older kernels cannot mount the device.
> This will enable 'journal_checksum' internally.
> .TP
> +.B journal_repair
> +If journal is broken, apply all valid transactions and pass
> +all broken ones(with invalid crc).
> +.TP
> .BR barrier=0 " / " barrier=1 " / " barrier " / " nobarrier
> These mount options have the same effect as in ext3.  The mount options
> "barrier" and "nobarrier" are added for consistency with other ext4 mount
> --
> 2.18.4
> 


Cheers, Andreas






Download attachment "signature.asc" of type "application/pgp-signature" (874 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ