lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250324054851.2509325-1-gautham.ananthakrishna@oracle.com>
Date: Mon, 24 Mar 2025 05:48:51 +0000
From: Gautham Ananthakrishna <gautham.ananthakrishna@...cle.com>
To: akpm@...ux-foundation.org, joseph.qi@...ux.alibaba.com
Cc: gautham.ananthakrishna@...cle.com, linux-kernel@...r.kernel.org,
        ocfs2-devel@...ts.linux.dev, rajesh.sivaramasubramaniom@...cle.com,
        junxiao.bi@...cle.com
Subject: [PATCH RFC 1/1] ocfs2: fix write IO performance improvement for high fragmentation

The commit 4eb7b93e03101fd3f35e69affe566e4b1e3e3dca caused a regression
in our test suite in discontig extent tests. Upon troubleshooting I found
The following issues.

1. The function ocfs2_cluster_group_search() was called for discontig allocations
as well. But it checks only the contiguous bits 'bg_contig_free_bits'.
It hit the ENOSPC in the following case in one of the tests.

ocfs2_mkdir()
 ocfs2_reserve_new_inode()
  ocfs2_reserve_suballoc_bits()
   ocfs2_block_group_alloc()
    ocfs2_block_group_alloc_discontig()
     __ocfs2_claim_clusters()
      ocfs2_claim_suballoc_bits()
       ocfs2_search_chain()
        ocfs2_cluster_group_search()

Looked like the commit did not consider discontig searches. To fix this,
I have split ocfs2_cluster_group_search() into *_common(), *_contig() and
*_discontig()

2. That commit enforced ocfs2_cluster_group_search() to search only the
'bits_wanted' number of bits whereas ocfs2_block_group_find_clear_bits()
fills the best available size and the function ocfs2_cluster_group_search()
itself is supposed to search 'min_bits' at the minimum and need not be
'bits_wanted' always.

Fixed the above issues in this patch.
This patch fixes 4eb7b93e03101fd3f35e69affe566e4b1e3e3dca

Signed-off-by: Gautham Ananthakrishna <gautham.ananthakrishna@...cle.com>
---
 fs/ocfs2/suballoc.c | 146 ++++++++++++++++++++++++++++----------------
 1 file changed, 95 insertions(+), 51 deletions(-)

diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 9bffa218bc084..be386e1d8e34d 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -82,7 +82,17 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
 				   u64 *last_alloc_group,
 				   int flags);
 
-static int ocfs2_cluster_group_search(struct inode *inode,
+static int ocfs2_cluster_group_search_contig(struct inode *inode,
+				      struct buffer_head *group_bh,
+				      u32 bits_wanted, u32 min_bits,
+				      u64 max_block,
+				      struct ocfs2_suballoc_result *res);
+static int ocfs2_cluster_group_search_common(struct inode *inode,
+				      struct buffer_head *group_bh,
+				      u32 bits_wanted, u32 min_bits,
+				      u64 max_block,
+				      struct ocfs2_suballoc_result *res);
+static int ocfs2_cluster_group_search_discontig(struct inode *inode,
 				      struct buffer_head *group_bh,
 				      u32 bits_wanted, u32 min_bits,
 				      u64 max_block,
@@ -593,6 +603,7 @@ ocfs2_block_group_alloc_discontig(handle_t *handle,
 		goto bail;
 	}
 
+	ac->ac_group_search = ocfs2_cluster_group_search_discontig;
 	status = ocfs2_extend_trans(handle,
 				    ocfs2_calc_bg_discontig_credits(osb->sb));
 	if (status) {
@@ -1127,7 +1138,9 @@ int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
 	int status;
 
 	ac->ac_which = OCFS2_AC_USE_MAIN;
-	ac->ac_group_search = ocfs2_cluster_group_search;
+
+	/* Make contig search as default */
+	ac->ac_group_search = ocfs2_cluster_group_search_contig;
 
 	status = ocfs2_reserve_suballoc_bits(osb, ac,
 					     GLOBAL_BITMAP_SYSTEM_INODE,
@@ -1525,9 +1538,7 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
 	return le16_to_cpu(bg->bg_free_bits_count) > wanted;
 }
 
-/* return 0 on success, -ENOSPC to keep searching and any other < 0
- * value on error. */
-static int ocfs2_cluster_group_search(struct inode *inode,
+static int ocfs2_cluster_group_search_common(struct inode *inode,
 				      struct buffer_head *group_bh,
 				      u32 bits_wanted, u32 min_bits,
 				      u64 max_block,
@@ -1542,57 +1553,90 @@ static int ocfs2_cluster_group_search(struct inode *inode,
 
 	BUG_ON(!ocfs2_is_cluster_bitmap(inode));
 
+	max_bits = le16_to_cpu(gd->bg_bits);
+
+	/* Tail groups in cluster bitmaps which aren't cpg
+	 * aligned are prone to partial extension by a failed
+	 * fs resize. If the file system resize never got to
+	 * update the dinode cluster count, then we don't want
+	 * to trust any clusters past it, regardless of what
+	 * the group descriptor says. */
+	gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
+						  le64_to_cpu(gd->bg_blkno));
+	if ((gd_cluster_off + max_bits) >
+	    OCFS2_I(inode)->ip_clusters) {
+		max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
+		trace_ocfs2_cluster_group_search_wrong_max_bits(
+			(unsigned long long)le64_to_cpu(gd->bg_blkno),
+			le16_to_cpu(gd->bg_bits),
+			OCFS2_I(inode)->ip_clusters, max_bits);
+	}
+
+	ret = ocfs2_block_group_find_clear_bits(osb,
+						group_bh, bits_wanted,
+						max_bits, res);
+	if (ret)
+		return ret;
+
+	if (max_block) {
+		blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
+						  gd_cluster_off +
+						  res->sr_bit_offset +
+						  res->sr_bits);
+		trace_ocfs2_cluster_group_search_max_block(
+			(unsigned long long)blkoff,
+			(unsigned long long)max_block);
+		if (blkoff > max_block)
+			return -ENOSPC;
+	}
+
+	/* ocfs2_block_group_find_clear_bits() might
+	 * return success, but we still want to return
+	 * -ENOSPC unless it found the minimum number
+	 * of bits. */
+	if (min_bits <= res->sr_bits)
+		search = 0; /* success */
+
+	return search;
+}
+
+static int ocfs2_cluster_group_search_discontig(struct inode *inode,
+				      struct buffer_head *group_bh,
+				      u32 bits_wanted, u32 min_bits,
+				      u64 max_block,
+				      struct ocfs2_suballoc_result *res)
+{
+	int search = -ENOSPC;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
+
+	if (le16_to_cpu(gd->bg_free_bits_count) >= min_bits)
+		search = ocfs2_cluster_group_search_common(inode, group_bh,
+							   bits_wanted, min_bits,
+							   max_block, res);
+	return search;
+}
+
+/* return 0 on success, -ENOSPC to keep searching and any other < 0
+ * value on error. */
+static int ocfs2_cluster_group_search_contig(struct inode *inode,
+				      struct buffer_head *group_bh,
+				      u32 bits_wanted, u32 min_bits,
+				      u64 max_block,
+				      struct ocfs2_suballoc_result *res)
+{
+	int search = -ENOSPC;
+	struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
+
+
 	if (le16_to_cpu(gd->bg_contig_free_bits) &&
 	    le16_to_cpu(gd->bg_contig_free_bits) < bits_wanted)
 		return -ENOSPC;
 
 	/* ->bg_contig_free_bits may un-initialized, so compare again */
-	if (le16_to_cpu(gd->bg_free_bits_count) >= bits_wanted) {
-		max_bits = le16_to_cpu(gd->bg_bits);
-
-		/* Tail groups in cluster bitmaps which aren't cpg
-		 * aligned are prone to partial extension by a failed
-		 * fs resize. If the file system resize never got to
-		 * update the dinode cluster count, then we don't want
-		 * to trust any clusters past it, regardless of what
-		 * the group descriptor says. */
-		gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
-							  le64_to_cpu(gd->bg_blkno));
-		if ((gd_cluster_off + max_bits) >
-		    OCFS2_I(inode)->ip_clusters) {
-			max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
-			trace_ocfs2_cluster_group_search_wrong_max_bits(
-				(unsigned long long)le64_to_cpu(gd->bg_blkno),
-				le16_to_cpu(gd->bg_bits),
-				OCFS2_I(inode)->ip_clusters, max_bits);
-		}
-
-		ret = ocfs2_block_group_find_clear_bits(osb,
-							group_bh, bits_wanted,
-							max_bits, res);
-		if (ret)
-			return ret;
-
-		if (max_block) {
-			blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
-							  gd_cluster_off +
-							  res->sr_bit_offset +
-							  res->sr_bits);
-			trace_ocfs2_cluster_group_search_max_block(
-				(unsigned long long)blkoff,
-				(unsigned long long)max_block);
-			if (blkoff > max_block)
-				return -ENOSPC;
-		}
-
-		/* ocfs2_block_group_find_clear_bits() might
-		 * return success, but we still want to return
-		 * -ENOSPC unless it found the minimum number
-		 * of bits. */
-		if (min_bits <= res->sr_bits)
-			search = 0; /* success */
-	}
-
+	if (le16_to_cpu(gd->bg_free_bits_count) >= bits_wanted)
+		search = ocfs2_cluster_group_search_common(inode, group_bh,
+							   bits_wanted, min_bits,
+							   max_block, res);
 	return search;
 }
 
-- 
2.43.5


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ