lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1526389583-32239-1-git-send-email-wshilong1991@gmail.com>
Date:   Tue, 15 May 2018 22:06:23 +0900
From:   Wang Shilong <wangshilong1991@...il.com>
To:     linux-ext4@...r.kernel.org
Cc:     andreas.dilger@...el.com, Wang Shilong <wshilong@....com>
Subject: [PATCH] ext4: add an interface to load block bitmaps

From: Wang Shilong <wshilong@....com>

During our benchmarking, we found sometimes writing
performances are not stable enough and there are some
small read during write which could drop throughput(~30%).

  It turned out that block bitmaps loading could make
some latency here,also for a heavy fragmented filesystem,
we might need load many bitmaps to find some free blocks.

  To improve above situation, we had a patch to load block
bitmaps to memory and pin those bitmaps memory until umount
or we release the memory on purpose, this could stable write
performances and improve performances of a heavy fragmented
filesystem.

Tested-by: Shuichi Ihara <sihara@....com>
Signed-off-by: Wang Shilong <wshilong@....com>
---
 fs/ext4/balloc.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/ext4.h   |  12 +++++++
 fs/ext4/super.c  |   3 ++
 fs/ext4/sysfs.c  |  26 ++++++++++++++
 4 files changed, 146 insertions(+)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b00481c..ceb63e8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -505,6 +505,8 @@ int ext4_wait_block_bitmap(struct super_block *sb, ext4_group_t block_group,
 					EXT4_GROUP_INFO_BBITMAP_CORRUPT);
 		return -EIO;
 	}
+	/* race is fine */
+	EXT4_SB(sb)->bbitmaps_read_cnt++;
 	clear_buffer_new(bh);
 	/* Panic or remount fs read-only if block bitmap is invalid */
 	return ext4_validate_block_bitmap(sb, desc, block_group, bh);
@@ -660,6 +662,109 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
 	return ret;
 }
 
+int ext4_load_block_bitmaps_bh(struct super_block *sb, unsigned int op)
+{
+	struct buffer_head *bitmap_bh;
+	struct ext4_group_desc *gdp;
+	ext4_group_t i, j;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	ext4_group_t cnt = 0;
+
+	if (op < EXT4_LOAD_BBITMAPS || op > EXT4_PIN_BBITMAPS)
+		return -EINVAL;
+
+	mutex_lock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+	/* don't pin bitmaps several times */
+	if (EXT4_SB(sb)->s_load_bbitmaps == EXT4_PIN_BBITMAPS) {
+		mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+		return 0;
+	}
+
+	for (i = 0; i < ngroups; i++) {
+		gdp = ext4_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		/* Load is simple, we could tolerate any
+		 * errors and continue to handle, but for
+		 * pin we return directly for simple handling
+		 * in unpin codes, otherwiese we need remember
+		 * which block bitmaps we pin exactly.
+		 */
+		bitmap_bh = ext4_read_block_bitmap(sb, i);
+		if (IS_ERR(bitmap_bh)) {
+			if (op == EXT4_LOAD_BBITMAPS)
+				continue;
+			else
+				goto failed;
+		}
+		if (op == EXT4_LOAD_BBITMAPS)
+			brelse(bitmap_bh);
+		cnt++;
+	}
+	/* Reset block bitmap to zero now */
+	EXT4_SB(sb)->bbitmaps_read_cnt = 0;
+	ext4_msg(sb, KERN_INFO, "%s %u block bitmaps finished",
+		 op == EXT4_PIN_BBITMAPS ? "pin" : "load", cnt);
+	EXT4_SB(sb)->s_load_bbitmaps = EXT4_PIN_BBITMAPS;
+	mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+
+	return 0;
+failed:
+	for (j = 0; j < i; j++) {
+		gdp = ext4_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		bitmap_bh = ext4_read_block_bitmap(sb, i);
+		if (!IS_ERR(bitmap_bh)) {
+			brelse(bitmap_bh);
+			brelse(bitmap_bh);
+		}
+	}
+	mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+	return PTR_ERR(bitmap_bh);
+}
+
+void ext4_unpin_block_bitmaps_bh(struct super_block *sb)
+{
+	struct buffer_head *bitmap_bh;
+	struct ext4_group_desc *gdp;
+	ext4_group_t i;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	ext4_group_t cnt = 0;
+
+	mutex_lock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+	if (EXT4_SB(sb)->s_load_bbitmaps == EXT4_UNPIN_BBITMAPS) {
+		mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+		return;
+	}
+
+	ext4_msg(sb, KERN_INFO,
+		 "Read block block bitmaps: %lu afer %s",
+		 EXT4_SB(sb)->bbitmaps_read_cnt,
+		 EXT4_SB(sb)->s_load_bbitmaps == EXT4_PIN_BBITMAPS ?
+		 "pin" : "load");
+
+	if (EXT4_SB(sb)->s_load_bbitmaps != EXT4_PIN_BBITMAPS) {
+		mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+		return;
+	}
+
+	for (i = 0; i < ngroups; i++) {
+		gdp = ext4_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		bitmap_bh = ext4_read_block_bitmap(sb, i);
+		if (IS_ERR(bitmap_bh))
+			continue;
+		brelse(bitmap_bh);
+		brelse(bitmap_bh);
+		cnt++;
+	}
+	ext4_msg(sb, KERN_INFO, "Unpin %u lock bitmaps finished", cnt);
+	EXT4_SB(sb)->s_load_bbitmaps = EXT4_UNPIN_BBITMAPS;
+	mutex_unlock(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+}
+
 /**
  * ext4_count_free_clusters() -- count filesystem free clusters
  * @sb:		superblock
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index fa52b7d..4f9ee73 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1317,6 +1317,12 @@ struct ext4_super_block {
 /* Number of quota types we support */
 #define EXT4_MAXQUOTAS 3
 
+enum {
+	EXT4_UNPIN_BBITMAPS = 0,
+	EXT4_LOAD_BBITMAPS,
+	EXT4_PIN_BBITMAPS,
+};
+
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1487,6 +1493,10 @@ struct ext4_sb_info {
 	/* Barrier between changing inodes' journal flags and writepages ops. */
 	struct percpu_rw_semaphore s_journal_flag_rwsem;
 	struct dax_device *s_daxdev;
+
+	struct mutex s_load_bbitmaps_lock;
+	unsigned long bbitmaps_read_cnt;
+	unsigned int s_load_bbitmaps;
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -2224,6 +2234,8 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
 				  struct buffer_head *bh);
 
 /* balloc.c */
+int ext4_load_block_bitmaps_bh(struct super_block *sb, unsigned int op);
+void ext4_unpin_block_bitmaps_bh(struct super_block *sb);
 extern void ext4_get_group_no_and_offset(struct super_block *sb,
 					 ext4_fsblk_t blocknr,
 					 ext4_group_t *blockgrpp,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1388e56..b3e896f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -902,6 +902,7 @@ static void ext4_put_super(struct super_block *sb)
 	int aborted = 0;
 	int i, err;
 
+	ext4_unpin_block_bitmaps_bh(sb);
 	ext4_unregister_li_request(sb);
 	ext4_quota_off_umount(sb);
 
@@ -4393,6 +4394,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
 	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
 
+	mutex_init(&EXT4_SB(sb)->s_load_bbitmaps_lock);
+
 	kfree(orig_data);
 	return 0;
 
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9ebd26c..89396b3f 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -23,6 +23,7 @@
 	attr_session_write_kbytes,
 	attr_lifetime_write_kbytes,
 	attr_reserved_clusters,
+	attr_load_bbitmaps,
 	attr_inode_readahead,
 	attr_trigger_test_error,
 	attr_feature,
@@ -105,6 +106,24 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi,
 	return count;
 }
 
+static ssize_t load_bbitmaps_store(struct ext4_sb_info *sbi,
+				   const char *buf, size_t count)
+{
+	unsigned long long val;
+	int ret;
+
+	ret = kstrtoull(skip_spaces(buf), 0, &val);
+	if (ret || val > EXT4_PIN_BBITMAPS)
+		return -EINVAL;
+
+	if (val == EXT4_UNPIN_BBITMAPS)
+		ext4_unpin_block_bitmaps_bh(sbi->s_sb);
+	else if (val > EXT4_UNPIN_BBITMAPS)
+		ret = ext4_load_block_bitmaps_bh(sbi->s_sb, val);
+
+	return ret ? ret : count;
+}
+
 static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
 				  const char *buf, size_t count)
 {
@@ -163,6 +182,7 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
 EXT4_ATTR_FUNC(session_write_kbytes, 0444);
 EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
 EXT4_ATTR_FUNC(reserved_clusters, 0644);
+EXT4_ATTR_FUNC(load_bbitmaps, 0644);
 
 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
 		 ext4_sb_info, s_inode_readahead_blks);
@@ -193,6 +213,7 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
 	ATTR_LIST(session_write_kbytes),
 	ATTR_LIST(lifetime_write_kbytes),
 	ATTR_LIST(reserved_clusters),
+	ATTR_LIST(load_bbitmaps),
 	ATTR_LIST(inode_readahead_blks),
 	ATTR_LIST(inode_goal),
 	ATTR_LIST(mb_stats),
@@ -270,6 +291,9 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
 		return snprintf(buf, PAGE_SIZE, "%llu\n",
 				(unsigned long long)
 				atomic64_read(&sbi->s_resv_clusters));
+	case attr_load_bbitmaps:
+		return snprintf(buf, PAGE_SIZE, "%u\n",
+				sbi->s_load_bbitmaps);
 	case attr_inode_readahead:
 	case attr_pointer_ui:
 		if (!ptr)
@@ -302,6 +326,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj,
 	switch (a->attr_id) {
 	case attr_reserved_clusters:
 		return reserved_clusters_store(sbi, buf, len);
+	case attr_load_bbitmaps:
+		return load_bbitmaps_store(sbi, buf, len);
 	case attr_pointer_ui:
 		if (!ptr)
 			return 0;
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ