lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 16 May 2022 18:54:11 +0200
From:   Pankaj Raghav <p.raghav@...sung.com>
To:     axboe@...nel.dk, damien.lemoal@...nsource.wdc.com,
        pankydev8@...il.com, dsterba@...e.com, hch@....de
Cc:     linux-nvme@...ts.infradead.org, linux-fsdevel@...r.kernel.org,
        linux-btrfs@...r.kernel.org, jiangbo.365@...edance.com,
        linux-block@...r.kernel.org, gost.dev@...sung.com,
        p.raghav@...sung.com, linux-kernel@...r.kernel.org,
        dm-devel@...hat.com
Subject: [PATCH v4 08/13] btrfs:zoned: make sb for npo2 zone devices align
 with sb log offsets

Superblocks for zoned devices are fixed as 2 zones at 0, 512GB and 4TB.
These are fixed at these locations so that recovery tools can reliably
retrieve the superblocks even if one of the mirror gets corrupted.

power of 2 zone sizes align at these offsets irrespective of their
value but non power of 2 zone sizes will not align.

To make sure the first zone at mirror 1 and mirror 2 align, write zero
operation is performed to move the write pointer of the first zone to
the expected offset. This operation is performed only after a zone reset
of the first zone, i.e., when the second zone that contains the sb is FULL.

Signed-off-by: Pankaj Raghav <p.raghav@...sung.com>
---
 fs/btrfs/zoned.c | 68 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 3023c871e..805aeaa76 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -760,11 +760,44 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
 	return 0;
 }
 
+static int fill_sb_wp_offset(struct block_device *bdev, struct blk_zone *zone,
+			     int mirror, u64 *wp_ret)
+{
+	u64 offset = 0;
+	int ret = 0;
+
+	ASSERT(!is_power_of_two_u64(zone->len));
+	ASSERT(zone->wp == zone->start);
+	ASSERT(mirror != 0);
+
+	switch (mirror) {
+	case 1:
+		div64_u64_rem(BTRFS_SB_LOG_FIRST_OFFSET >> SECTOR_SHIFT,
+			      zone->len, &offset);
+		break;
+	case 2:
+		div64_u64_rem(BTRFS_SB_LOG_SECOND_OFFSET >> SECTOR_SHIFT,
+			      zone->len, &offset);
+		break;
+	}
+
+	ret =  blkdev_issue_zeroout(bdev, zone->start, offset, GFP_NOFS, 0);
+	if (ret)
+		return ret;
+
+	zone->wp += offset;
+	zone->cond = BLK_ZONE_COND_IMP_OPEN;
+	*wp_ret = zone->wp << SECTOR_SHIFT;
+
+	return 0;
+}
+
 static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
-			   int rw, u64 *bytenr_ret)
+			   int rw, int mirror, u64 *bytenr_ret)
 {
 	u64 wp;
 	int ret;
+	bool zones_empty = false;
 
 	if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
 		*bytenr_ret = zones[0].start << SECTOR_SHIFT;
@@ -775,13 +808,31 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
 	if (ret != -ENOENT && ret < 0)
 		return ret;
 
+	if (ret == -ENOENT)
+		zones_empty = true;
+
 	if (rw == WRITE) {
 		struct blk_zone *reset = NULL;
+		bool is_sb_offset_write_req = false;
+		u32 reset_zone_nr = -1;
 
-		if (wp == zones[0].start << SECTOR_SHIFT)
+		if (wp == zones[0].start << SECTOR_SHIFT) {
 			reset = &zones[0];
-		else if (wp == zones[1].start << SECTOR_SHIFT)
+			reset_zone_nr = 0;
+		} else if (wp == zones[1].start << SECTOR_SHIFT) {
 			reset = &zones[1];
+			reset_zone_nr = 1;
+		}
+
+		/*
+		 * Non po2 zone sizes will not align naturally at
+		 * mirror 1 (512GB) and mirror 2 (4TB). The wp of the
+		 * 1st zone in those superblock mirrors need to be
+		 * moved to align at those offsets.
+		 */
+		is_sb_offset_write_req =
+			(zones_empty || (reset_zone_nr == 0)) && mirror &&
+			!is_power_of_2(zones[0].len);
 
 		if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
 			ASSERT(sb_zone_is_full(reset));
@@ -795,6 +846,13 @@ static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
 			reset->cond = BLK_ZONE_COND_EMPTY;
 			reset->wp = reset->start;
 		}
+
+		if (is_sb_offset_write_req) {
+			ret = fill_sb_wp_offset(bdev, &zones[0], mirror, &wp);
+			if (ret)
+				return ret;
+		}
+
 	} else if (ret != -ENOENT) {
 		/*
 		 * For READ, we want the previous one. Move write pointer to
@@ -851,7 +909,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
 	if (ret != BTRFS_NR_SB_LOG_ZONES)
 		return -EIO;
 
-	return sb_log_location(bdev, zones, rw, bytenr_ret);
+	return sb_log_location(bdev, zones, rw, mirror, bytenr_ret);
 }
 
 int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
@@ -877,7 +935,7 @@ int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
 
 	return sb_log_location(device->bdev,
 			       &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror],
-			       rw, bytenr_ret);
+			       rw, mirror, bytenr_ret);
 }
 
 static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo,
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ