[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20200319233433.117144-7-harshadshirwadkar@gmail.com>
Date: Thu, 19 Mar 2020 16:34:32 -0700
From: Harshad Shirwadkar <harshadshirwadkar@...il.com>
To: linux-ext4@...r.kernel.org
Cc: Harshad Shirwadkar <harshadshirwadkar@...il.com>
Subject: [PATCH 6/7] e2fsck: main fast commit replay handler
Add main ext4 fast commit replay handler that handles replayed fast
commit blocks.
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@...il.com>
---
e2fsck/e2fsck.h | 9 +
e2fsck/journal.c | 491 ++++++++++++++++++++++++++++++++++++++++++-
lib/ext2fs/ext2_fs.h | 46 ++++
3 files changed, 545 insertions(+), 1 deletion(-)
diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h
index 68f7a249..8ea87ac5 100644
--- a/e2fsck/e2fsck.h
+++ b/e2fsck/e2fsck.h
@@ -226,6 +226,12 @@ typedef struct e2fsck_struct *e2fsck_t;
#define MAX_EXTENT_DEPTH_COUNT 5
+struct e2fsck_fc_replay_state {
+ int fc_replay_error;
+ int fc_replay_expected_off;
+ int fc_num_blks;
+};
+
struct e2fsck_struct {
ext2_filsys fs;
const char *program_name;
@@ -418,6 +424,9 @@ struct e2fsck_struct {
/* Undo file */
char *undo_file;
+
+ /* Fast commit replay stuff */
+ struct e2fsck_fc_replay_state fc_replay_state;
};
/* Data structures to evaluate whether an extent tree needs rebuilding. */
diff --git a/e2fsck/journal.c b/e2fsck/journal.c
index 7d9f1b40..97fb3c24 100644
--- a/e2fsck/journal.c
+++ b/e2fsck/journal.c
@@ -278,6 +278,485 @@ static int process_journal_block(ext2_filsys fs,
return 0;
}
+static int ext4_journal_fc_replay_scan(journal_t *j, struct buffer_head *bh,
+ int off)
+{
+ e2fsck_t ctx = j->j_fs_dev->k_ctx;
+ struct e2fsck_fc_replay_state *state;
+ struct ext4_fc_commit_hdr *fc_hdr;
+ struct ext4_fc_tl *tl;
+ __u32 csum, old_csum;
+ __u8 *start, *end;
+
+ state = &ctx->fc_replay_state;
+ fc_hdr = (struct ext4_fc_commit_hdr *)
+ ((__u8 *)bh->b_data + sizeof(journal_header_t));
+
+ start = (__u8 *)fc_hdr;
+ end = (__u8 *)bh->b_data + j->j_blocksize;
+
+ /* Check if we already concluded that this fast commit is not useful */
+ if (state->fc_replay_expected_off && state->fc_replay_error)
+ goto out_err;
+
+ if (le32_to_cpu(fc_hdr->fc_magic) != EXT4_FC_MAGIC) {
+ state->fc_replay_error = -EXT2_ET_BAD_MAGIC;
+ goto out_err;
+ }
+
+ if (off != state->fc_replay_expected_off) {
+ state->fc_replay_error = -EXT2_ET_CORRUPT_JOURNAL_SB;
+ goto out_err;
+ }
+
+ state->fc_replay_expected_off++;
+
+ if (le16_to_cpu(fc_hdr->fc_features)) {
+ state->fc_replay_error = -EXT2_ET_OP_NOT_SUPPORTED;
+ goto out_err;
+ }
+
+ old_csum = fc_hdr->fc_csum;
+ fc_hdr->fc_csum = 0;
+ csum = jbd2_chksum(j, 0, start, end - start);
+ fc_hdr->fc_csum = old_csum;
+
+ if (csum != le32_to_cpu(fc_hdr->fc_csum)) {
+ state->fc_replay_error = -EXT2_ET_BAD_CRC;
+ goto out_err;
+ }
+ state->fc_num_blks++;
+ return 0;
+
+out_err:
+ return state->fc_replay_error;
+}
+
+/* Get length of a particular tlv */
+static int fc_tag_len(struct ext4_fc_tl *tl)
+{
+ return le16_to_cpu(tl->fc_len);
+}
+
+/* Get a pointer to "value" of a tlv */
+static __u8 *fc_tag_val(struct ext4_fc_tl *tl)
+{
+ return (__u8 *)tl + sizeof(*tl);
+}
+
+static int ext4_fc_handle_unlink(ext2_filsys fs, int parent_ino,
+ const char *dname, int ino)
+{
+ struct ext2_inode inode;
+ int ret;
+
+ ret = ext2fs_unlink(fs, parent_ino, dname, ino, 0);
+ if (ret)
+ return ret;
+
+ ret = ext2fs_read_inode(fs, ino, &inode);
+ if (ret)
+ return ret;
+
+ if (inode.i_links_count > 1) {
+ inode.i_links_count--;
+ ret = ext2fs_write_inode(fs, ino, &inode);
+ if (ret)
+ return ret;
+ } else {
+ memset(&inode, 0, sizeof(inode));
+ ext2fs_write_inode(fs, ino, &inode);
+ ext2fs_unmark_inode_bitmap2(fs->inode_map, ino);
+ ext2fs_mark_ib_dirty(fs);
+ }
+
+ return 0;
+}
+
+static inline int get_fc_hdr_inode_len(ext2_filsys fs,
+ struct ext4_fc_commit_hdr *fc_hdr)
+{
+ int inode_len = EXT2_GOOD_OLD_INODE_SIZE;
+
+ if (EXT2_INODE_SIZE(fs->super)
+ > EXT2_GOOD_OLD_INODE_SIZE)
+ inode_len +=
+ ext2fs_le16_to_cpu(((struct ext2_inode_large *)
+ (fc_hdr + 1))->i_extra_isize);
+ return inode_len;
+}
+
+static inline struct ext4_fc_tl *get_first_tl(ext2_filsys fs,
+ struct ext4_fc_commit_hdr *fc_hdr)
+{
+ return (struct ext4_fc_tl *)((__u8 *)fc_hdr +
+ sizeof(struct ext4_fc_commit_hdr) +
+ get_fc_hdr_inode_len(fs, fc_hdr));
+}
+
+static inline struct ext4_fc_tl *get_next_tl(struct ext4_fc_tl *tl)
+{
+ return (struct ext4_fc_tl *)((__u8 *)tl +
+ le16_to_cpu(tl->fc_len) +
+ sizeof(*tl));
+}
+
+static inline int num_tls(struct ext4_fc_commit_hdr *fc_hdr)
+{
+ return le16_to_cpu(fc_hdr->fc_num_tlvs);
+}
+
+static int fc_replay_dentries(journal_t *j,
+ struct ext4_fc_commit_hdr *fc_hdr)
+{
+ int inode_len, ret, i;
+ struct ext4_fc_dentry_info *fcd;
+ ext2_filsys fs = j->j_fs_dev->k_ctx->fs;
+ struct ext2_inode *inode;
+ struct ext4_fc_tl *tl;
+ int parent_ino, ino;
+ char *dname;
+
+ inode_len = get_fc_hdr_inode_len(fs, fc_hdr);
+ tl = get_first_tl(fs, fc_hdr);
+ for (i = 0; i < le16_to_cpu(fc_hdr->fc_num_tlvs); i++) {
+ fcd = (struct ext4_fc_dentry_info *)fc_tag_val(tl);
+
+ parent_ino = le32_to_cpu(fcd->fc_parent_ino);
+ ino = le32_to_cpu(fcd->fc_ino);
+ dname = strndup(fcd->fc_dname, fc_tag_len(tl) -
+ sizeof(struct ext4_fc_dentry_info));
+ if (le16_to_cpu(tl->fc_tag) == EXT4_FC_TAG_ADD_DENTRY) {
+ ret = ext2fs_link(fs, parent_ino, dname, ino,
+ EXT2_FT_REG_FILE);
+ ext2fs_free_mem(&dname);
+ if (ret)
+ return ret;
+ ext2fs_mark_inode_bitmap2(
+ fs->inode_map, ino);
+ ext2fs_mark_ib_dirty(fs);
+ } else if (le16_to_cpu(tl->fc_tag) == EXT4_FC_TAG_DEL_DENTRY) {
+ ret = ext4_fc_handle_unlink(fs, parent_ino, dname, ino);
+ ext2fs_free_mem(&dname);
+ if (ret)
+ return ret;
+ } else if (le16_to_cpu(tl->fc_tag) ==
+ EXT4_FC_TAG_CREAT_DENTRY) {
+ ext2fs_mark_inode_bitmap2(fs->inode_map, ino);
+ ret = ext2fs_link(fs, parent_ino, dname, ino,
+ EXT2_FT_REG_FILE);
+ if (ret) {
+ ext2fs_free_mem(&dname);
+ return ret;
+ }
+ ext2fs_free_mem(&dname);
+
+ ret = ext2fs_get_mem(inode_len, &inode);
+ if (ret)
+ return ret;
+ ret = ext2fs_read_inode_full(fs, ino, inode, inode_len);
+ if (ret) {
+ ext2fs_free_mem(&inode);
+ return ret;
+ }
+ memcpy(inode, (struct ext2_inode *)(fc_hdr + 1),
+ inode_len);
+ ret = ext2fs_write_inode_full(fs, ino, inode,
+ inode_len);
+ if (ret) {
+ ext2fs_free_mem(&inode);
+ return ret;
+ }
+ ext2fs_free_mem(&inode);
+ ext2fs_mark_ib_dirty(fs);
+ }
+ tl = get_next_tl(tl);
+ }
+ return 0;
+}
+
+static int ext2fs_add_extent_to_list(struct extent_list *list,
+ struct ext2fs_extent *ex)
+{
+ int ret;
+
+ if (list->count == list->size) {
+ unsigned int new_size = (list->size + NUM_EXTENTS) *
+ sizeof(struct ext2fs_extent);
+ ret = ext2fs_resize_mem(0, new_size, &list->extents);
+ if (ret)
+ return ret;
+ list->size += NUM_EXTENTS;
+ }
+
+ memcpy(list->extents + list->count, ex, sizeof(*ex));
+ list->count++;
+ return 0;
+}
+
+static int ext2fs_del_extent_from_list(struct extent_list *list,
+ struct ext2fs_extent *del)
+{
+ struct ext2fs_extent extent;
+ int ret, i, j, del_start, del_end, iter_start, iter_end;
+
+ i = 0;
+ del_start = del->e_lblk;
+ del_end = del->e_lblk + del->e_len - 1;
+
+ while (i < list->count) {
+ iter_start = list->extents[i].e_lblk;
+ iter_end = list->extents[i].e_lblk + list->extents[i].e_len - 1;
+
+ if (del_start > iter_end || del_end < iter_start) {
+ i++;
+ continue;
+ } else if (del_start <= iter_start && del_end >= iter_end) {
+ iter_start = iter_end + 1;
+ } else if (iter_start <= del_start && del_end <= iter_end) {
+ extent.e_lblk = del_end + 1;
+ extent.e_len = iter_end - del_end;
+ extent.e_pblk = list->extents[i].e_pblk +
+ extent.e_lblk - iter_start;
+ extent.e_flags = list->extents[i].e_flags;
+ ret = ext2fs_add_extent_to_list(list, &extent);
+ if (ret)
+ return ret;
+ iter_end = del_start - 1;
+ } else if (del_start >= iter_start && del_start <= iter_end) {
+ iter_end = del_start - 1;
+ } else if (del_end >= iter_start && del_end <= iter_end) {
+ iter_start = del_end + 1;
+ } else {
+ /* Should not come here */
+ exit(FSCK_ERROR);
+ }
+
+ if (iter_start > iter_end) {
+ /*
+ * If this removal resulted in iter being of zero
+ * length, remove it right away, and start the next
+ * iteration at current index.
+ */
+ for (j = i; j < list->count - 1; j++)
+ list->extents[j] = list->extents[j + 1];
+ list->count--;
+ } else {
+ list->extents[i].e_lblk = iter_start;
+ list->extents[i].e_len = iter_end - iter_start + 1;
+ i++;
+ }
+ }
+
+ return 0;
+}
+
+static void ext3_to_ext2fs_extent(struct ext2fs_extent *to,
+ struct ext3_extent *from)
+{
+ to->e_pblk = ext2fs_le32_to_cpu(from->ee_start) +
+ ((__u64) ext2fs_le16_to_cpu(from->ee_start_hi)
+ << 32);
+ to->e_lblk = ext2fs_le32_to_cpu(from->ee_block);
+ to->e_len = ext2fs_le16_to_cpu(from->ee_len);
+ to->e_flags |= EXT2_EXTENT_FLAGS_LEAF;
+ if (to->e_len > EXT_INIT_MAX_LEN) {
+ to->e_len -= EXT_INIT_MAX_LEN;
+ to->e_flags |= EXT2_EXTENT_FLAGS_UNINIT;
+ }
+}
+
+static int ex_compar(const void *arg1, const void *arg2)
+{
+ struct ext2fs_extent *ex1 = (struct ext2fs_extent *)arg1;
+ struct ext2fs_extent *ex2 = (struct ext2fs_extent *)arg2;
+
+ if (ex1->e_lblk < ex2->e_lblk)
+ return -1;
+ if (ex1->e_lblk > ex2->e_lblk)
+ return 1;
+ return ex1->e_len - ex2->e_len;
+}
+
+static void sort_and_merge_extents(struct extent_list *list)
+{
+ struct ext2fs_extent *iter;
+ blk64_t ex_end;
+ int i, j;
+
+ if (list->count < 2)
+ return;
+
+ qsort(list->extents, list->count, sizeof(list->extents[0]),
+ ex_compar);
+
+ i = 0;
+ while (i < list->count - 1) {
+ if (list->extents[i].e_lblk + list->extents[i].e_len - 1 <
+ list->extents[i + 1].e_lblk) {
+ i++;
+ continue;
+ }
+ ex_end = MAX(list->extents[i].e_lblk + list->extents[i].e_len,
+ list->extents[i + 1].e_lblk +
+ list->extents[i + 1].e_len) - 1;
+ list->extents[i].e_len = ex_end - list->extents[i].e_lblk + 1;
+ for (j = i + 1; j < list->count - 1; j++)
+ list->extents[j] = list->extents[j + 1];
+ list->count--;
+ }
+}
+
+static void mark_blocks_used(ext2_filsys fs, blk64_t pblk, int count)
+{
+ int i = 0;
+
+ for (i = 0; i < count; i++) {
+ if (ext2fs_test_block_bitmap2(fs->block_map, pblk + i))
+ continue;
+ ext2fs_mark_block_bitmap2(fs->block_map, pblk + i);
+ }
+}
+
+static void mark_blocks_free(ext2_filsys fs, blk64_t pblk, int count)
+{
+ int i = 0;
+
+ for (i = 0; i < count; i++) {
+ if (!ext2fs_test_block_bitmap2(fs->block_map, pblk + i))
+ continue;
+ ext2fs_unmark_block_bitmap2(fs->block_map, pblk + i);
+ }
+}
+
+static int ext4_journal_fc_replay_cb(journal_t *journal, struct buffer_head *bh,
+ enum passtype pass, int off)
+{
+ struct ext4_fc_commit_hdr *fc_hdr;
+ struct ext4_fc_tl *tl;
+ struct ext3_extent *ex;
+ ext2_extent_handle_t handle = 0;
+ int i, j, ret, ino, num_extents;
+ struct ext2_inode *inode;
+ e2fsck_t ctx = journal->j_fs_dev->k_ctx;
+ struct ext2fs_extent extent;
+ struct extent_list extent_list = {0};
+ struct ext4_fc_lrange *lrange;
+ int inode_len;
+ blk64_t pblk;
+
+ if (pass == PASS_SCAN)
+ return ext4_journal_fc_replay_scan(journal, bh, off);
+ else if (pass != PASS_REPLAY)
+ return 0;
+ ctx->fc_replay_state.fc_num_blks--;
+
+ if (ctx->fc_replay_state.fc_replay_error) {
+ jfs_debug("Scan phase detected error. Aborting replay..\n");
+ return ctx->fc_replay_state.fc_replay_error;
+ }
+
+ ret = ext2fs_read_bitmaps(ctx->fs);
+ if (ret)
+ return ret;
+
+ fc_hdr = (struct ext4_fc_commit_hdr *)
+ ((__u8 *)bh->b_data + sizeof(journal_header_t));
+ inode_len = get_fc_hdr_inode_len(ctx->fs, fc_hdr);
+ ret = fc_replay_dentries(journal, fc_hdr);
+ if (ret)
+ return ret;
+
+ ino = le32_to_cpu(fc_hdr->fc_ino);
+ extent_list.ino = ino;
+ ret = e2fsck_read_extents(ctx, &extent_list);
+ if (ret)
+ return ret;
+
+ tl = get_first_tl(ctx->fs, fc_hdr);
+ for (i = 0; i < num_tls(fc_hdr); i++) {
+ switch (le16_to_cpu(tl->fc_tag)) {
+ case EXT4_FC_TAG_ADD_RANGE:
+ ext3_to_ext2fs_extent(&extent,
+ (struct ext3_extent *)(tl + 1));
+ ret = ext2fs_add_extent_to_list(&extent_list, &extent);
+ if (ret)
+ goto out;
+ mark_blocks_used(ctx->fs, extent.e_pblk, extent.e_len);
+ break;
+ case EXT4_FC_TAG_DEL_RANGE:
+ lrange = (struct ext4_fc_lrange *)(tl + 1);
+ extent.e_lblk = ext2fs_le32_to_cpu(lrange->fc_lblk);
+ extent.e_len = ext2fs_le16_to_cpu(lrange->fc_len);
+
+ pblk = 0;
+ for (j = 0; j < extent_list.count; j++) {
+ if (extent.e_lblk >=
+ extent_list.extents[j].e_lblk &&
+ extent.e_lblk <
+ extent_list.extents[j].e_lblk +
+ extent_list.extents[j].e_len) {
+ pblk = extent_list.extents[j].e_pblk +
+ extent.e_lblk -
+ extent_list.extents[j].e_lblk;
+ break;
+ }
+ }
+ ret = ext2fs_del_extent_from_list(&extent_list,
+ &extent);
+ if (ret)
+ goto out;
+
+ if (pblk != 0)
+ mark_blocks_free(ctx->fs, pblk, extent.e_len);
+ break;
+ default:
+ break;
+ }
+ tl = get_next_tl(tl);
+ }
+ ext2fs_mark_bb_dirty(ctx->fs);
+ sort_and_merge_extents(&extent_list);
+
+ ret = e2fsck_rewrite_extent_tree(ctx, &extent_list);
+ if (ret)
+ goto out;
+
+ ret = ext2fs_get_mem(inode_len, &inode);
+ if (ret)
+ goto out;
+ ret = ext2fs_read_inode_full(ctx->fs, ino, inode, inode_len);
+ if (ret)
+ goto out;
+
+ if (inode->i_flags & EXT4_INLINE_DATA_FL) {
+ memcpy(inode, fc_hdr + 1, inode_len);
+ } else {
+ memcpy(inode, fc_hdr + 1,
+ offsetof(struct ext2_inode_large, i_block));
+ memcpy(&inode->i_generation,
+ &((struct ext2_inode_large *)(fc_hdr + 1))->i_generation,
+ inode_len -
+ offsetof(struct ext2_inode_large, i_generation));
+ }
+
+ ret = ext2fs_write_inode_full(ctx->fs, ino, inode, inode_len);
+ if (ret)
+ goto out;
+
+ if (ctx->fc_replay_state.fc_num_blks == 0) {
+ ext2fs_mark_super_dirty(ctx->fs);
+ ext2fs_write_block_bitmap(ctx->fs);
+ ext2fs_write_inode_bitmap(ctx->fs);
+ ext2fs_calculate_summary_stats(ctx->fs);
+ ext2fs_set_gdt_csum(ctx->fs);
+ ext2fs_flush(ctx->fs);
+ }
+out:
+ ext2fs_free_mem(&extent_list.extents);
+ return ret;
+}
+
static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
{
struct process_block_struct pb;
@@ -514,6 +993,10 @@ static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
journal->j_sb_buffer = bh;
journal->j_superblock = (journal_superblock_t *)bh->b_data;
+ if (ext2fs_has_feature_fast_commit(ctx->fs->super))
+ journal->j_fc_replay_callback = ext4_journal_fc_replay_cb;
+ else
+ journal->j_fc_replay_callback = NULL;
#ifdef USE_INODE_IO
if (j_inode)
@@ -688,7 +1171,13 @@ static errcode_t e2fsck_journal_load(journal_t *journal)
journal->j_transaction_sequence = journal->j_tail_sequence;
journal->j_tail = ntohl(jsb->s_start);
journal->j_first = ntohl(jsb->s_first);
- journal->j_last = ntohl(jsb->s_maxlen);
+ if (jbd2_has_feature_fast_commit(journal)) {
+ journal->j_last_fc = ntohl(jsb->s_maxlen);
+ journal->j_last = journal->j_last_fc - JBD2_FAST_COMMIT_BLOCKS;
+ journal->j_first_fc = journal->j_last + 1;
+ } else {
+ journal->j_last = ntohl(jsb->s_maxlen);
+ }
return 0;
}
diff --git a/lib/ext2fs/ext2_fs.h b/lib/ext2fs/ext2_fs.h
index 6c20ea77..410db16a 100644
--- a/lib/ext2fs/ext2_fs.h
+++ b/lib/ext2fs/ext2_fs.h
@@ -490,6 +490,52 @@ struct ext2_inode_large {
/*9c*/ __u32 i_projid; /* Project ID */
};
+/* Fast commit stuff */
+/* Ext4 fast commit related info */
+
+/* Magic of fast commit header */
+#define EXT4_FC_MAGIC 0xE2540090
+
+struct ext4_fc_commit_hdr {
+ /* Fast commit magic, should be EXT4_FC_MAGIC */
+ __u32 fc_magic;
+ /* Features used by this fast commit block */
+ __u8 fc_features;
+ /* Number of TLVs in this fast commmit block */
+ __u16 fc_num_tlvs;
+ /* Inode number */
+ __u32 fc_ino;
+ /* Csum(hdr+contents) */
+ __u32 fc_csum;
+};
+
+struct ext4_fc_lrange {
+ __le32 fc_lblk;
+ __le32 fc_len;
+};
+
+#define EXT4_FC_TAG_ADD_RANGE 0x1
+#define EXT4_FC_TAG_DEL_RANGE 0x2
+#define EXT4_FC_TAG_CREAT_DENTRY 0x3
+#define EXT4_FC_TAG_ADD_DENTRY 0x4
+#define EXT4_FC_TAG_DEL_DENTRY 0x5
+
+struct ext4_fc_tl {
+ __le16 fc_tag;
+ __le16 fc_len;
+};
+
+/* On disk fast commit tlv value structure for dirent tags:
+ * - EXT4_FC_TAG_CREATE_DENTRY
+ * - EXT4_FC_TAG_ADD_DENTRY
+ * - EXT4_FC_TAG_DEL_DENTRY
+ */
+struct ext4_fc_dentry_info {
+ __le32 fc_parent_ino;
+ __le32 fc_ino;
+ __u8 fc_dname[0];
+};
+
#define EXT4_INODE_CSUM_HI_EXTRA_END \
(offsetof(struct ext2_inode_large, i_checksum_hi) + sizeof(__u16) - \
EXT2_GOOD_OLD_INODE_SIZE)
--
2.25.1.696.g5e7596f4ac-goog
Powered by blists - more mailing lists