[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20190722040011.18892-11-harshadshirwadkar@gmail.com>
Date: Sun, 21 Jul 2019 21:00:11 -0700
From: Harshad Shirwadkar <harshadshirwadkar@...il.com>
To: linux-ext4@...r.kernel.org
Cc: Harshad Shirwadkar <harshadshirwadkar@...il.com>
Subject: [PATCH 11/11] ext4: fast-commit recovery path changes
This patch adds core fast-commit recovery path changes. Each fast
commit block stores modified extents for a particular file. Replay
code maps blocks in each such extent to the actual file one-by-one. We
also update corresponding file system metadata to account for newly
mapped blocks. In order to achieve all of these,
ext4_inode_csum_set(), ext4_inode_blocks() which were earlier static
are now made visible.
I updated e2fsprogs to set fast commit feature flag and to ignore fast
commit blocks during e2fsck. After applying all the patches in this
series, following runs of xfstests were performed:
- kvm-xfstest.sh -g log -c 4k
- kvm-xfstests.sh smoke
All the log tests were successful and smoke tests didn't introduce any
additional failures.
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@...il.com>
---
fs/ext4/balloc.c | 7 ++-
fs/ext4/ext4.h | 4 ++
fs/ext4/extents.c | 19 +++++---
fs/ext4/inode.c | 8 ++--
fs/ext4/mballoc.c | 83 ++++++++++++++++++++++++++++++++
fs/ext4/mballoc.h | 2 +
fs/ext4/super.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 230 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0b202e00d93f..75c3025c7089 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -360,7 +360,12 @@ static int ext4_validate_block_bitmap(struct super_block *sb,
struct buffer_head *bh)
{
ext4_fsblk_t blk;
- struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
+ struct ext4_group_info *grp;
+
+ if (EXT4_SB(sb)->fc_replay)
+ return 0;
+
+ grp = ext4_get_group_info(sb, block_group);
if (buffer_verified(bh))
return 0;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5d92a2e4f0af..44a4d16c241c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2577,6 +2577,10 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
extern void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid);
/* inode.c */
+void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
+ struct ext4_inode_info *ei);
+blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
+ struct ext4_inode_info *ei);
int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 66f7f4fb1612..59fe596ce97d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2894,7 +2894,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
int depth = ext_depth(inode);
struct ext4_ext_path *path = NULL;
struct partial_cluster partial;
- handle_t *handle;
+ handle_t *handle = NULL;
int i = 0, err = 0;
partial.pclu = 0;
@@ -2904,9 +2904,11 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
ext_debug("truncate since %u to %u\n", start, end);
/* probably first extent we're gonna free will be last in block */
- handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (!sbi->fc_replay) {
+ handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, depth + 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ }
again:
trace_ext4_ext_remove_space(inode, start, end, depth);
@@ -2926,7 +2928,8 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
/* find extent for or closest extent to this block */
path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
if (IS_ERR(path)) {
- ext4_journal_stop(handle);
+ if (!sbi->fc_replay)
+ ext4_journal_stop(handle);
return PTR_ERR(path);
}
depth = ext_depth(inode);
@@ -3012,7 +3015,8 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
path = kcalloc(depth + 1, sizeof(struct ext4_ext_path),
GFP_NOFS);
if (path == NULL) {
- ext4_journal_stop(handle);
+ if (!sbi->fc_replay)
+ ext4_journal_stop(handle);
return -ENOMEM;
}
path[0].p_maxdepth = path[0].p_depth = depth;
@@ -3142,7 +3146,8 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
path = NULL;
if (err == -EAGAIN)
goto again;
- ext4_journal_stop(handle);
+ if (!sbi->fc_replay)
+ ext4_journal_stop(handle);
return err;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index dd5d39a48363..21c9b5197c72 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -103,8 +103,8 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
return provided == calculated;
}
-static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
- struct ext4_inode_info *ei)
+void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
+ struct ext4_inode_info *ei)
{
__u32 csum;
@@ -4801,8 +4801,8 @@ void ext4_set_inode_flags(struct inode *inode)
S_ENCRYPTED|S_CASEFOLD);
}
-static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
- struct ext4_inode_info *ei)
+blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode,
+ struct ext4_inode_info *ei)
{
blkcnt_t i_blocks ;
struct inode *inode = &(ei->vfs_inode);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a3e2767bdf2f..70551fa91237 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2915,6 +2915,89 @@ void ext4_exit_mballoc(void)
}
+void ext4_mb_mark_used(struct super_block *sb, ext4_fsblk_t block,
+ int len)
+{
+ struct buffer_head *bitmap_bh = NULL;
+ struct ext4_group_desc *gdp;
+ struct buffer_head *gdp_bh;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_group_t group;
+ ext4_fsblk_t cluster;
+ ext4_grpblk_t blkoff;
+ int i, clen, err;
+ int already_allocated_count;
+
+ cluster = EXT4_B2C(sbi, block);
+ clen = EXT4_B2C(sbi, len);
+
+ ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
+ bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (IS_ERR(bitmap_bh)) {
+ err = PTR_ERR(bitmap_bh);
+ bitmap_bh = NULL;
+ goto out_err;
+ }
+
+ err = -EIO;
+ gdp = ext4_get_group_desc(sb, group, &gdp_bh);
+ if (!gdp)
+ goto out_err;
+
+ if (!ext4_data_block_valid(sbi, block, len)) {
+ ext4_error(sb, "Allocating blks %llu-%llu which overlap mdata",
+ cluster, cluster+clen);
+ /* File system mounted not to panic on error
+ * Fix the bitmap and return EFSCORRUPTED
+ * We leak some of the blocks here.
+ */
+ ext4_lock_group(sb, group);
+ ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
+ ext4_unlock_group(sb, group);
+ err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+ if (!err)
+ err = -EFSCORRUPTED;
+ goto out_err;
+ }
+
+ ext4_lock_group(sb, group);
+ already_allocated_count = 0;
+ for (i = 0; i < clen; i++)
+ if (mb_test_bit(blkoff + i, bitmap_bh->b_data))
+ already_allocated_count++;
+
+ ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
+ if (ext4_has_group_desc_csum(sb) &&
+ (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
+ gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
+ ext4_free_group_clusters_set(sb, gdp,
+ ext4_free_clusters_after_init(sb,
+ group, gdp));
+ }
+ clen = ext4_free_group_clusters(sb, gdp) - clen +
+ already_allocated_count;
+ ext4_free_group_clusters_set(sb, gdp, clen);
+ ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
+ ext4_group_desc_csum_set(sb, group, gdp);
+
+ ext4_unlock_group(sb, group);
+
+ if (sbi->s_log_groups_per_flex) {
+ ext4_group_t flex_group = ext4_flex_group(sbi, group);
+
+ atomic64_sub(len,
+ &sbi->s_flex_groups[flex_group].free_clusters);
+ }
+
+ err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
+ if (err)
+ goto out_err;
+ err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
+
+out_err:
+ brelse(bitmap_bh);
+}
+
/*
* Check quota and mark chosen space (ac->ac_b_ex) non-free in bitmaps
* Returns 0 if success or error code
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 88c98f17e3d9..1881710041b6 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -215,4 +215,6 @@ ext4_mballoc_query_range(
ext4_mballoc_query_range_fn formatter,
void *priv);
+void ext4_mb_mark_used(struct super_block *sb, ext4_fsblk_t block,
+ int len);
#endif
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a291d41b91de..f38ff2089389 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -408,6 +408,118 @@ static int block_device_ejected(struct super_block *sb)
return bdi->dev == NULL;
}
+static void ext4_fc_add_block(struct inode *inode, ext4_lblk_t lblk,
+ ext4_fsblk_t pblk, int unwritten)
+{
+ struct ext4_extent ex;
+ struct ext4_ext_path *path = NULL;
+ struct ext4_map_blocks map;
+ int ret;
+
+ map.m_lblk = lblk;
+ map.m_len = 0x1;
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret > 0) {
+ if (pblk != map.m_pblk)
+ jbd_debug(1, "Bad mapping found while replaying fc\n");
+ return;
+ }
+
+ ex.ee_block = cpu_to_le32(lblk);
+ ext4_ext_store_pblock(&ex, pblk);
+ ex.ee_len = cpu_to_le32(0x1);
+ if (unwritten)
+ ext4_ext_mark_unwritten(&ex);
+
+ path = ext4_find_extent(inode, lblk, NULL, 0);
+ if (path) {
+ down_write(&EXT4_I(inode)->i_data_sem);
+ ret = ext4_ext_insert_extent(NULL, inode, &path, &ex, 0);
+ ext4_mb_mark_used(inode->i_sb, ext4_ext_pblock(&ex), 0x1);
+ up_write((&EXT4_I(inode)->i_data_sem));
+ kfree(path);
+ }
+}
+
+static int ext4_journal_fc_replay_cb(journal_t *journal, struct buffer_head *bh)
+{
+ struct super_block *sb = journal->j_private;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_fc_commit_hdr *fc_hdr;
+ struct ext4_fc_tl *tl;
+ struct ext4_iloc iloc;
+ struct ext4_extent *ex;
+ struct inode *inode;
+ int ret;
+
+ sbi->fc_replay = true;
+ fc_hdr = (struct ext4_fc_commit_hdr *)
+ ((__u8 *)bh->b_data + sizeof(journal_header_t));
+
+ jbd_debug(3, "%s: Got FC block for inode %d at [%d,%d]", __func__,
+ le32_to_cpu(fc_hdr->fc_ino), le32_to_cpu(fc_hdr->fc_tid),
+ le32_to_cpu(fc_hdr->fc_subtid));
+
+ inode = ext4_iget(sb, le32_to_cpu(fc_hdr->fc_ino), EXT4_IGET_NORMAL);
+ if (IS_ERR(inode))
+ return 0;
+
+ ret = ext4_get_inode_loc(inode, &iloc);
+ if (ret)
+ return ret;
+
+ inode_lock(inode);
+ tl = (struct ext4_fc_tl *)(fc_hdr + 1);
+ while (le16_to_cpu(tl->fc_tag) == EXT4_FC_TAG_EXT) {
+ int i;
+
+ ex = (struct ext4_extent *)(tl + 1);
+ tl = (struct ext4_fc_tl *)((__u8 *)tl +
+ le16_to_cpu(tl->fc_len) +
+ sizeof(*tl));
+ /*
+ * We add block by block because part of extent may already have
+ * been added by a previous fast commit replay.
+ */
+ for (i = 0; i < ext4_ext_get_actual_len(ex); i++)
+ ext4_fc_add_block(inode, le32_to_cpu(ex->ee_block) + i,
+ ext4_ext_pblock(ex) + i,
+ ext4_ext_is_unwritten(ex));
+ }
+
+ /*
+ * Unless inode contains inline data, copy everything except
+ * i_blocks. i_blocks would have been set alright by ext4_fc_add_block
+ * call above.
+ */
+ if (ext4_has_inline_data(inode)) {
+ memcpy(ext4_raw_inode(&iloc), &fc_hdr->inode,
+ sizeof(struct ext4_inode));
+ } else {
+ memcpy(ext4_raw_inode(&iloc), &fc_hdr->inode,
+ offsetof(struct ext4_inode, i_block));
+ memcpy(&ext4_raw_inode(&iloc)->i_generation,
+ &fc_hdr->inode.i_generation,
+ sizeof(struct ext4_inode) -
+ offsetof(struct ext4_inode, i_generation));
+ }
+
+ ext4_reserve_inode_write(NULL, inode, &iloc);
+ inode_unlock(inode);
+ sbi->fc_replay = false;
+
+ ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
+ ret = ext4_handle_dirty_metadata(NULL, inode, iloc.bh);
+ iput(inode);
+ if (!ret)
+ ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
+
+ brelse(iloc.bh);
+
+ return ret;
+}
+
+
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
@@ -4935,6 +5047,13 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
journal->j_fc_commit_callback = ext4_journal_fc_commit_cb;
journal->j_fc_cleanup_callback = ext4_journal_fc_cleanup_cb;
}
+
+ /*
+ * We set replay callback even if fast commit disabled because we may
+ * could still have fast commit blocks that need to be replayed even if
+ * fast commit has now been turned off.
+ */
+ journal->j_fc_replay_callback = ext4_journal_fc_replay_cb;
write_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
journal->j_flags |= JBD2_BARRIER;
--
2.22.0.657.g960e92d24f-goog
Powered by blists - more mailing lists