[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <d332ed2f90a0c8533139741d25e7fc1935773e14.1757058211.git.ojaswin@linux.ibm.com>
Date: Fri, 5 Sep 2025 13:44:47 +0530
From: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
To: linux-ext4@...r.kernel.org, "Theodore Ts'o" <tytso@....edu>
Cc: Ritesh Harjani <ritesh.list@...il.com>, Zhang Yi <yi.zhang@...wei.com>,
linux-kernel@...r.kernel.org, "Darrick J . Wong" <djwong@...nel.org>
Subject: [PATCH 2/2] fsmap: use blocksize units instead of cluster units
Currently, ext4's implementation of fsmap converts the ranges to cluster
units if bigalloc is enabled and then converts to block units whenever
needed. However, this back and forth conversion has known to cause
several edge case issues since even with bigalloc the metadata is still
in block size unit.
Hence, convert the code to use block size instead of cluster size.
Cluster size is only used when dealing with mballoc bitmap. This takes
care of the existing issues with the code, example, for a mapping as
follows:
xfs_io -c fsmap -d (bs=4096, clustersize=65536)
0: 254:16 [0..7]: static fs metadata 8
1: 254:16 [8..15]: special 102:1 8
2: 254:16 [16..327]: special 102:2 312
3: 254:16 [328..351]: special 102:3 24
4: 254:16 [352..375]: special 102:4 24
...
xfs_io -c fsmap -d 6 16 (before this patch):
0: 254:16 [0..7]: static fs metadata 8
1: 254:16 [8..23]: unknown 16 <--- incorrect/ambiguous entry
xfs_io -c fsmap -d 6 16 (after this patch):
0: 254:16 [0..7]: static fs metadata 8
1: 254:16 [8..15]: special 102:1 8
2: 254:16 [16..327]: special 102:2 312
Signed-off-by: Ojaswin Mujoo <ojaswin@...ux.ibm.com>
---
fs/ext4/fsmap.c | 42 +++++++++++++++++++++++-------------------
fs/ext4/mballoc.c | 25 +++++++++++++++++--------
2 files changed, 40 insertions(+), 27 deletions(-)
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 22fc333244ef..02e5d501dda8 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -193,13 +193,11 @@ static int ext4_getfsmap_meta_helper(struct super_block *sb,
struct ext4_getfsmap_info *info = priv;
struct ext4_fsmap *p;
struct ext4_fsmap *tmp;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t fsb, fs_start, fs_end;
int error;
- fs_start = fsb = (EXT4_C2B(sbi, start) +
- ext4_group_first_block_no(sb, agno));
- fs_end = fs_start + EXT4_C2B(sbi, len);
+ fs_start = fsb = start + ext4_group_first_block_no(sb, agno);
+ fs_end = fs_start + len;
/*
* Return relevant extents from the meta_list. We emit all extents that
@@ -248,13 +246,12 @@ static int ext4_getfsmap_datadev_helper(struct super_block *sb,
struct ext4_getfsmap_info *info = priv;
struct ext4_fsmap *p;
struct ext4_fsmap *tmp;
- struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t fsb;
ext4_fsblk_t fslen;
int error;
- fsb = (EXT4_C2B(sbi, start) + ext4_group_first_block_no(sb, agno));
- fslen = EXT4_C2B(sbi, len);
+ fsb = start + ext4_group_first_block_no(sb, agno);
+ fslen = len;
/* If the retained free extent record is set... */
if (info->gfi_lastfree.fmr_owner) {
@@ -531,13 +528,13 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t start_fsb;
- ext4_fsblk_t end_fsb;
+ ext4_fsblk_t end_fsb, orig_end_fsb;
ext4_fsblk_t bofs;
ext4_fsblk_t eofs;
ext4_group_t start_ag;
ext4_group_t end_ag;
- ext4_grpblk_t first_cluster;
- ext4_grpblk_t last_cluster;
+ ext4_grpblk_t first_grpblk;
+ ext4_grpblk_t last_grpblk;
struct ext4_fsmap irec;
int error = 0;
@@ -553,10 +550,18 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
return 0;
start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical;
+ orig_end_fsb = end_fsb;
- /* Determine first and last group to examine based on start and end */
- ext4_get_group_no_and_offset(sb, start_fsb, &start_ag, &first_cluster);
- ext4_get_group_no_and_offset(sb, end_fsb, &end_ag, &last_cluster);
+ /*
+ * Determine first and last group to examine based on start and end.
+ * NOTE: do_div() should take ext4_fsblk_t instead of ext4_group_t as
+ * first argument else it will fail on 32bit archs.
+ */
+ first_grpblk = do_div(start_fsb, EXT4_BLOCKS_PER_GROUP(sb));
+ last_grpblk = do_div(end_fsb, EXT4_BLOCKS_PER_GROUP(sb));
+
+ start_ag = start_fsb;
+ end_ag = end_fsb;
/*
* Convert the fsmap low/high keys to bg based keys. Initialize
@@ -564,7 +569,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
* of the bg.
*/
info->gfi_low = keys[0];
- info->gfi_low.fmr_physical = EXT4_C2B(sbi, first_cluster);
+ info->gfi_low.fmr_physical = first_grpblk;
info->gfi_low.fmr_length = 0;
memset(&info->gfi_high, 0xFF, sizeof(info->gfi_high));
@@ -584,8 +589,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
*/
if (info->gfi_agno == end_ag) {
info->gfi_high = keys[1];
- info->gfi_high.fmr_physical = EXT4_C2B(sbi,
- last_cluster);
+ info->gfi_high.fmr_physical = last_grpblk;
info->gfi_high.fmr_length = 0;
}
@@ -600,8 +604,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
info->gfi_high.fmr_owner);
error = ext4_mballoc_query_range(sb, info->gfi_agno,
- EXT4_B2C(sbi, info->gfi_low.fmr_physical),
- EXT4_B2C(sbi, info->gfi_high.fmr_physical),
+ info->gfi_low.fmr_physical,
+ info->gfi_high.fmr_physical,
ext4_getfsmap_meta_helper,
ext4_getfsmap_datadev_helper, info);
if (error)
@@ -627,7 +631,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
* any allocated blocks at the end of the range.
*/
irec.fmr_device = 0;
- irec.fmr_physical = end_fsb + 1;
+ irec.fmr_physical = orig_end_fsb + 1;
irec.fmr_length = 0;
irec.fmr_owner = EXT4_FMR_OWN_FREE;
irec.fmr_flags = 0;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 5898d92ba19f..cf78776940dd 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -7117,6 +7117,8 @@ ext4_mballoc_query_range(
ext4_grpblk_t start, next;
struct ext4_buddy e4b;
int error;
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ ext4_grpblk_t start_c, end_c, next_c;
error = ext4_mb_load_buddy(sb, group, &e4b);
if (error)
@@ -7125,9 +7127,9 @@ ext4_mballoc_query_range(
ext4_lock_group(sb, group);
- start = max(e4b.bd_info->bb_first_free, first);
- if (end >= EXT4_CLUSTERS_PER_GROUP(sb))
- end = EXT4_CLUSTERS_PER_GROUP(sb) - 1;
+ start = max(EXT4_C2B(sbi, e4b.bd_info->bb_first_free), first);
+ if (end >= EXT4_BLOCKS_PER_GROUP(sb))
+ end = EXT4_BLOCKS_PER_GROUP(sb) - 1;
if (meta_formatter && start != first) {
if (start > end)
start = end;
@@ -7138,19 +7140,26 @@ ext4_mballoc_query_range(
goto out_unload;
ext4_lock_group(sb, group);
}
- while (start <= end) {
- start = mb_find_next_zero_bit(bitmap, end + 1, start);
- if (start > end)
+
+ start_c = EXT4_B2C(sbi, start);
+ end_c = EXT4_B2C(sbi, end);
+
+ while (start_c <= end_c) {
+ start_c = mb_find_next_zero_bit(bitmap, end_c + 1, start_c);
+ if (start_c > end_c)
break;
- next = mb_find_next_bit(bitmap, end + 1, start);
+ next_c = mb_find_next_bit(bitmap, end_c + 1, start_c);
ext4_unlock_group(sb, group);
+
+ start = EXT4_C2B(sbi, start_c);
+ next = EXT4_C2B(sbi, next_c);
error = formatter(sb, group, start, next - start, priv);
if (error)
goto out_unload;
ext4_lock_group(sb, group);
- start = next + 1;
+ start_c = next_c + 1;
}
ext4_unlock_group(sb, group);
--
2.49.0
Powered by blists - more mailing lists