lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <4df93910-d942-2410-f7cd-cc28f7a00a86@gmail.com>
Date:   Wed, 26 May 2021 16:43:51 +0800
From:   Wang Jianchao <jianchao.wan9@...il.com>
To:     Theodore Ts'o <tytso@....edu>,
        Andreas Dilger <adilger.kernel@...ger.ca>
Cc:     linux-ext4@...r.kernel.org, linux-kernel@...r.kernel.org,
        lishujin@...ishou.com
Subject: [PATCH V2 6/7] ext4: use bb_free_root to get the free data entry

This is also preparing for following async background discard.
In this patch, the s_freed_data_list is removed and we iterate
all of the group's free_data_root rb tree to get the entry.
After this, we needn't operate it when insert and merge free
data entry any more.

Signed-off-by: Wang Jianchao <wangjianchao@...ishou.com>
---
 fs/ext4/balloc.c  |   2 +-
 fs/ext4/ext4.h    |   4 +--
 fs/ext4/mballoc.c | 104 +++++++++++++++++++++++++-----------------------------
 3 files changed, 50 insertions(+), 60 deletions(-)

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 74a5172..8053a5f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -652,7 +652,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 	 * possible we just missed a transaction commit that did so
 	 */
 	smp_mb();
-	if (sbi->s_mb_free_pending == 0)
+	if (!atomic_read(&sbi->s_mb_free_pending))
 		return ext4_has_free_clusters(sbi, 1, 0);
 
 	/*
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 826a56e3..5c5c8e4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1525,9 +1525,7 @@ struct ext4_sb_info {
 	unsigned short *s_mb_offsets;
 	unsigned int *s_mb_maxs;
 	unsigned int s_group_info_size;
-	unsigned int s_mb_free_pending;
-	struct list_head s_freed_data_list;	/* List of blocks to be freed
-						   after commit completed */
+	atomic_t s_mb_free_pending;
 
 	/* tunables */
 	unsigned long s_stripe;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index c2bf40a..15715e7 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -354,8 +354,7 @@ static inline struct ext4_free_data *efd_entry(struct rb_node *n)
 {
 	return rb_entry_safe(n, struct ext4_free_data, efd_node);
 }
-static int ext4_insert_free_data(struct ext4_sb_info *sbi,
-		struct rb_root *root, struct ext4_free_data *nfd);
+static int ext4_insert_free_data(struct rb_root *root, struct ext4_free_data *nfd);
 
 /*
  * The algorithm using this percpu seq counter goes below:
@@ -2857,8 +2856,7 @@ int ext4_mb_init(struct super_block *sb)
 
 	spin_lock_init(&sbi->s_md_lock);
 	spin_lock_init(&sbi->s_bal_lock);
-	sbi->s_mb_free_pending = 0;
-	INIT_LIST_HEAD(&sbi->s_freed_data_list);
+	atomic_set(&sbi->s_mb_free_pending, 0);
 
 	sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
 	sbi->s_mb_min_to_scan = MB_DEFAULT_MIN_TO_SCAN;
@@ -3040,9 +3038,7 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
 	/* we expect to find existing buddy because it's pinned */
 	BUG_ON(err != 0);
 
-	spin_lock(&EXT4_SB(sb)->s_md_lock);
-	EXT4_SB(sb)->s_mb_free_pending -= entry->efd_count;
-	spin_unlock(&EXT4_SB(sb)->s_md_lock);
+	atomic_sub(entry->efd_count, &EXT4_SB(sb)->s_mb_free_pending);
 
 	db = e4b.bd_info;
 	/* there are blocks to put in buddy to make them really free */
@@ -3084,37 +3080,41 @@ static void ext4_free_data_in_buddy(struct super_block *sb,
 void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
-	struct ext4_free_data *entry, *tmp;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	struct ext4_free_data *fd, *nfd;
+	struct ext4_group_info *grp;
 	struct bio *discard_bio = NULL;
 	struct list_head freed_data_list;
-	struct list_head *cut_pos = NULL;
-	int err;
+	int err, i;
 
-	INIT_LIST_HEAD(&freed_data_list);
+	if (!atomic_read(&sbi->s_mb_free_pending))
+		return;
 
-	spin_lock(&sbi->s_md_lock);
-	list_for_each_entry(entry, &sbi->s_freed_data_list, efd_list) {
-		if (entry->efd_tid != commit_tid)
-			break;
-		cut_pos = &entry->efd_list;
+	INIT_LIST_HEAD(&freed_data_list);
+	for (i = 0; i < ngroups; i++) {
+		grp = ext4_get_group_info(sb, i);
+		ext4_lock_group(sb, i);
+		rbtree_postorder_for_each_entry_safe(fd, nfd,
+				&grp->bb_free_root, efd_node) {
+			if (fd->efd_tid != commit_tid)
+				continue;
+			INIT_LIST_HEAD(&fd->efd_list);
+			list_add_tail(&fd->efd_list, &freed_data_list);
+		}
+		ext4_unlock_group(sb, i);
 	}
-	if (cut_pos)
-		list_cut_position(&freed_data_list, &sbi->s_freed_data_list,
-				  cut_pos);
-	spin_unlock(&sbi->s_md_lock);
 
 	if (test_opt(sb, DISCARD)) {
-		list_for_each_entry(entry, &freed_data_list, efd_list) {
-			err = ext4_issue_discard(sb, entry->efd_group,
-						 entry->efd_start_cluster,
-						 entry->efd_count,
+		list_for_each_entry(fd, &freed_data_list, efd_list) {
+			err = ext4_issue_discard(sb, fd->efd_group,
+						 fd->efd_start_cluster,
+						 fd->efd_count,
 						 &discard_bio);
 			if (err && err != -EOPNOTSUPP) {
 				ext4_msg(sb, KERN_WARNING, "discard request in"
 					 " group:%d block:%d count:%d failed"
-					 " with %d", entry->efd_group,
-					 entry->efd_start_cluster,
-					 entry->efd_count, err);
+					 " with %d", fd->efd_group,
+					 fd->efd_start_cluster, fd->efd_count, err);
 			} else if (err == -EOPNOTSUPP)
 				break;
 		}
@@ -3125,8 +3125,8 @@ void ext4_process_freed_data(struct super_block *sb, tid_t commit_tid)
 		}
 	}
 
-	list_for_each_entry_safe(entry, tmp, &freed_data_list, efd_list)
-		ext4_free_data_in_buddy(sb, entry);
+	list_for_each_entry_safe(fd, nfd, &freed_data_list, efd_list)
+		ext4_free_data_in_buddy(sb, fd);
 }
 
 int __init ext4_init_mballoc(void)
@@ -5051,32 +5051,27 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
  * are contiguous, AND the extents were freed by the same transaction,
  * AND the blocks are associated with the same group.
  */
-static void ext4_try_merge_freed_extent(struct ext4_sb_info *sbi,
-					struct ext4_free_data *entry,
-					struct ext4_free_data *new_entry,
-					struct rb_root *entry_rb_root)
+static void ext4_try_merge_freed_extent(struct rb_root *root,
+	struct ext4_free_data *fd, struct ext4_free_data *nfd)
 {
-	if ((entry->efd_tid != new_entry->efd_tid) ||
-	    (entry->efd_group != new_entry->efd_group))
+	if ((fd->efd_tid != nfd->efd_tid) ||
+	    (fd->efd_group != nfd->efd_group))
 		return;
-	if (entry->efd_start_cluster + entry->efd_count ==
-	    new_entry->efd_start_cluster) {
-		new_entry->efd_start_cluster = entry->efd_start_cluster;
-		new_entry->efd_count += entry->efd_count;
-	} else if (new_entry->efd_start_cluster + new_entry->efd_count ==
-		   entry->efd_start_cluster) {
-		new_entry->efd_count += entry->efd_count;
+	if (fd->efd_start_cluster + fd->efd_count ==
+	    nfd->efd_start_cluster) {
+		nfd->efd_start_cluster = fd->efd_start_cluster;
+		nfd->efd_count += fd->efd_count;
+	} else if (nfd->efd_start_cluster + nfd->efd_count ==
+		   fd->efd_start_cluster) {
+		nfd->efd_count += fd->efd_count;
 	} else
 		return;
-	spin_lock(&sbi->s_md_lock);
-	list_del(&entry->efd_list);
-	spin_unlock(&sbi->s_md_lock);
-	rb_erase(&entry->efd_node, entry_rb_root);
-	kmem_cache_free(ext4_free_data_cachep, entry);
+	rb_erase(&fd->efd_node, root);
+	kmem_cache_free(ext4_free_data_cachep, fd);
 }
 
-static int ext4_insert_free_data(struct ext4_sb_info *sbi,
-		struct rb_root *root, struct ext4_free_data *nfd)
+static int ext4_insert_free_data(struct rb_root *root,
+		struct ext4_free_data *nfd)
 {
 	struct rb_node **n = &root->rb_node;
 	struct rb_node *p = NULL;
@@ -5100,11 +5095,11 @@ static int ext4_insert_free_data(struct ext4_sb_info *sbi,
 	/* Now try to see the extent can be merged to left and right */
 	fd = efd_entry(rb_prev(&nfd->efd_node));
 	if (fd)
-		ext4_try_merge_freed_extent(sbi, fd, nfd, root);
+		ext4_try_merge_freed_extent(root, fd, nfd);
 
 	fd = efd_entry(rb_next(&nfd->efd_node));
 	if (fd)
-		ext4_try_merge_freed_extent(sbi, fd, nfd, root);
+		ext4_try_merge_freed_extent(root, fd, nfd);
 
 	return 0;
 }
@@ -5122,7 +5117,7 @@ static int ext4_insert_free_data(struct ext4_sb_info *sbi,
 	BUG_ON(e4b->bd_bitmap_page == NULL);
 	BUG_ON(e4b->bd_buddy_page == NULL);
 
-	if (ext4_insert_free_data(sbi, &db->bb_free_root, nfd)) {
+	if (ext4_insert_free_data(&db->bb_free_root, nfd)) {
 		ext4_grp_locked_error(sb, e4b->bd_group, 0,
 				ext4_group_first_block_no(sb, e4b->bd_group) +
 				EXT4_C2B(sbi, nfd->efd_start_cluster),
@@ -5140,10 +5135,7 @@ static int ext4_insert_free_data(struct ext4_sb_info *sbi,
 		get_page(e4b->bd_bitmap_page);
 	}
 
-	spin_lock(&sbi->s_md_lock);
-	list_add_tail(&nfd->efd_list, &sbi->s_freed_data_list);
-	sbi->s_mb_free_pending += nfd->efd_count;
-	spin_unlock(&sbi->s_md_lock);
+	atomic_add(nfd->efd_count, &sbi->s_mb_free_pending);
 	return 0;
 }
 
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ