[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <A38E4C3B-F2B9-4EA0-A053-6CDA4F2D5913@dilger.ca>
Date: Mon, 18 Sep 2023 17:01:56 -0600
From: Andreas Dilger <adilger@...ger.ca>
To: Harshad Shirwadkar <harshadshirwadkar@...il.com>
Cc: Ext4 Developers List <linux-ext4@...r.kernel.org>,
Theodore Ts'o <tytso@....edu>,
Alexey Lyashkov <alexey.lyashkov@...il.com>
Subject: Re: [PATCH v4 4/8] e2fsck: add fast commit scan pass
https://patchwork.ozlabs.org/project/linux-ext4/list/?series=312552On Sep 18, 2023, at 2:39 PM, Andreas Dilger <adilger@...ger.ca> wrote:
>
> On Jan 21, 2021, at 10:45 PM, Harshad Shirwadkar <harshadshirwadkar@...il.com> wrote:
>>
>> From: Harshad Shirwadkar <harshadshirwadkar@...il.com>
>>
>> Add fast commit scan pass. Scan pass is responsible for following
>> things:
>>
>> * Count total number of fast commit tags that need to be replayed
>> during the replay phase.
>>
>> * Validate whether the fast commit area is valid for a given
>> transaction ID.
>>
>> * Verify the CRC of fast commit area.
>>
>> Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@...il.com>
>
> I was making a fix to debugfs/journal.c today to improve performance
> of the revoke hashtable, since it was performing very badly with a
> large journal and lots of revokes (separate patch to be submitted).
>
> I noticed that debugfs/journal.c is totally missing any of the fast
> commit handling that was added to e2fsck/journal.c in this patch.
>
> This seems dangerous since there are some cases where debugfs and tune2fs
> will do journal recovery in userspace, but it appears possible that they
> would totally miss any fast commit transaction handling.
>
> It isn't great that we have two nearly identical copies of the same code
> in e2fsprogs, but looks is difficult to make them totally identical.
> We could potentially play some tricks here (e.g. use a common variable
> name for both "ctx" and "fs" in the code, unify copies of e2fsck_journal_*
> and ext2fs_journal_* into a single file, and potentially abstract out
> some of the differences (mainly from e2fsck/journal.c fixing errors
> during journal recovery) into helper functions that are no-ops on the
> debugfs/journal.c side.
>
> There would still be two different *builds* of the code, with a lot of
> macro expansions to hide the differences, but I think it looks possible
> to at least bring these two copies more into sync. I have some cleanups,
> but I don't know much about fast commits and what should be done there.
I was poking in ext4 Patchwork for an unrelated reason and found the
following patch series from Alexey Lyashkov that is already cleaning
up a bunch of this code duplication by moving it into lib/support:
https://patchwork.ozlabs.org/project/linux-ext4/list/?series=312552
This is cleaning up a bunch of the common code between e2fsck and debugfs,
but doesn't get far enough to add in the missing fast commit feature,
AFAICS.
Cheers, Andreas
>> ---
>> e2fsck/journal.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++
>> 1 file changed, 109 insertions(+)
>>
>> diff --git a/e2fsck/journal.c b/e2fsck/journal.c
>> index 2c8e3441..f1aa0fd6 100644
>> --- a/e2fsck/journal.c
>> +++ b/e2fsck/journal.c
>> @@ -278,6 +278,108 @@ static int process_journal_block(ext2_filsys fs,
>> return 0;
>> }
>>
>> +static int ext4_fc_replay_scan(journal_t *j, struct buffer_head *bh,
>> + int off, tid_t expected_tid)
>> +{
>> + e2fsck_t ctx = j->j_fs_dev->k_ctx;
>> + struct e2fsck_fc_replay_state *state;
>> + int ret = JBD2_FC_REPLAY_CONTINUE;
>> + struct ext4_fc_add_range *ext;
>> + struct ext4_fc_tl *tl;
>> + struct ext4_fc_tail *tail;
>> + __u8 *start, *end;
>> + struct ext4_fc_head *head;
>> + struct ext2fs_extent ext2fs_ex;
>> +
>> + state = &ctx->fc_replay_state;
>> +
>> + start = (__u8 *)bh->b_data;
>> + end = (__u8 *)bh->b_data + j->j_blocksize - 1;
>> +
>> + jbd_debug(1, "Scan phase starting, expected %d", expected_tid);
>> + if (state->fc_replay_expected_off == 0) {
>> + memset(state, 0, sizeof(*state));
>> + /* Check if we can stop early */
>> + if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
>> + != EXT4_FC_TAG_HEAD) {
>> + jbd_debug(1, "Ending early!, not a head tag");
>> + return 0;
>> + }
>> + }
>> +
>> + if (off != state->fc_replay_expected_off) {
>> + ret = -EFSCORRUPTED;
>> + goto out_err;
>> + }
>> +
>> + state->fc_replay_expected_off++;
>> + fc_for_each_tl(start, end, tl) {
>> + jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
>> + tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
>> + switch (le16_to_cpu(tl->fc_tag)) {
>> + case EXT4_FC_TAG_ADD_RANGE:
>> + ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
>> + ret = ext2fs_decode_extent(&ext2fs_ex, (void *)&ext->fc_ex,
>> + sizeof(ext->fc_ex));
>> + if (ret)
>> + ret = JBD2_FC_REPLAY_STOP;
>> + else
>> + ret = JBD2_FC_REPLAY_CONTINUE;
>> + case EXT4_FC_TAG_DEL_RANGE:
>> + case EXT4_FC_TAG_LINK:
>> + case EXT4_FC_TAG_UNLINK:
>> + case EXT4_FC_TAG_CREAT:
>> + case EXT4_FC_TAG_INODE:
>> + case EXT4_FC_TAG_PAD:
>> + state->fc_cur_tag++;
>> + state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> + sizeof(*tl) + ext4_fc_tag_len(tl));
>> + break;
>> + case EXT4_FC_TAG_TAIL:
>> + state->fc_cur_tag++;
>> + tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
>> + state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> + sizeof(*tl) +
>> + offsetof(struct ext4_fc_tail,
>> + fc_crc));
>> + jbd_debug(1, "tail tid %d, expected %d\n",
>> + le32_to_cpu(tail->fc_tid),
>> + expected_tid);
>> + if (le32_to_cpu(tail->fc_tid) == expected_tid &&
>> + le32_to_cpu(tail->fc_crc) == state->fc_crc) {
>> + state->fc_replay_num_tags = state->fc_cur_tag;
>> + } else {
>> + ret = state->fc_replay_num_tags ?
>> + JBD2_FC_REPLAY_STOP : -EFSBADCRC;
>> + }
>> + state->fc_crc = 0;
>> + break;
>> + case EXT4_FC_TAG_HEAD:
>> + head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
>> + if (le32_to_cpu(head->fc_features) &
>> + ~EXT4_FC_SUPPORTED_FEATURES) {
>> + ret = -EOPNOTSUPP;
>> + break;
>> + }
>> + if (le32_to_cpu(head->fc_tid) != expected_tid) {
>> + ret = -EINVAL;
>> + break;
>> + }
>> + state->fc_cur_tag++;
>> + state->fc_crc = jbd2_chksum(j, state->fc_crc, tl,
>> + sizeof(*tl) + ext4_fc_tag_len(tl));
>> + break;
>> + default:
>> + ret = state->fc_replay_num_tags ?
>> + JBD2_FC_REPLAY_STOP : -ECANCELED;
>> + }
>> + if (ret < 0 || ret == JBD2_FC_REPLAY_STOP)
>> + break;
>> + }
>> +
>> +out_err:
>> + return ret;
>> +}
>> /*
>> * Main recovery path entry point. This function returns JBD2_FC_REPLAY_CONTINUE
>> * to indicate that it is expecting more fast commit blocks. It returns
>> @@ -286,6 +388,13 @@ static int process_journal_block(ext2_filsys fs,
>> static int ext4_fc_replay(journal_t *journal, struct buffer_head *bh,
>> enum passtype pass, int off, tid_t expected_tid)
>> {
>> + e2fsck_t ctx = journal->j_fs_dev->k_ctx;
>> + struct e2fsck_fc_replay_state *state = &ctx->fc_replay_state;
>> +
>> + if (pass == PASS_SCAN) {
>> + state->fc_current_pass = PASS_SCAN;
>> + return ext4_fc_replay_scan(journal, bh, off, expected_tid);
>> + }
>> return JBD2_FC_REPLAY_STOP;
>> }
>>
>> --
>> 2.30.0.280.ga3ce27912f-goog
>>
>
>
> Cheers, Andreas
>
>
>
>
>
Cheers, Andreas
Download attachment "signature.asc" of type "application/pgp-signature" (874 bytes)
Powered by blists - more mailing lists