lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20250512011927.2809400-12-yukuai1@huaweicloud.com>
Date: Mon, 12 May 2025 09:19:19 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: hch@....de,
	xni@...hat.com,
	colyli@...nel.org,
	agk@...hat.com,
	snitzer@...nel.org,
	mpatocka@...hat.com,
	song@...nel.org,
	yukuai3@...wei.com
Cc: linux-kernel@...r.kernel.org,
	dm-devel@...ts.linux.dev,
	linux-raid@...r.kernel.org,
	yukuai1@...weicloud.com,
	yi.zhang@...wei.com,
	yangerkun@...wei.com,
	johnny.chenyi@...wei.com
Subject: [PATCH RFC md-6.16 v3 11/19] md/md-llbitmap: implement bitmap IO

From: Yu Kuai <yukuai3@...wei.com>

READ

While creating bitmap, all pages will be allocated and read for llbitmap,
there won't be read afterwards

WRITE

WRITE IO is ievided into logical_block_size of the array, the dirty state
of each block is tracked independently, for example:

each page is 4k, contain 8 blocks; each block is 512 bytes contain 512 bit;

| page0 | page1 | ... | page 31 |
|       |
|        \-----------------------\
|                                |
| block0 | block1 | ... | block 8|
|        |
|         \-----------------\
|                            |
| bit0 | bit1 | ... | bit511 |

>From IO path, if one bit is changed to Dirty or NeedSync, the corresponding
block will be marked dirty, such block must write first before the IO is
issued. This behaviour will affect IO performance, to reduce the impact, if
multiple bits are changed in the same block in a short time, all bits in
this block will be changed to Dirty/NeedSync, so that there won't be any
overhead until daemon clears dirty bits.

Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
 drivers/md/md-llbitmap.c | 183 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)

diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 8ab4c77abd32..b27d10661387 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -279,3 +279,186 @@ static char state_machine[nr_llbitmap_state][nr_llbitmap_action] = {
 	[BitNeedSync] = {BitNone, BitSyncing, BitNone, BitNone, BitNone, BitNone, BitUnwritten, BitNone},
 	[BitSyncing] = {BitNone, BitSyncing, BitDirty, BitNeedSync, BitNeedSync, BitNone, BitUnwritten, BitNeedSync},
 };
+
+static bool is_raid456(struct mddev *mddev)
+{
+	return (mddev->level == 4 || mddev->level == 5 || mddev->level == 6);
+}
+
+static int llbitmap_read(struct llbitmap *llbitmap, enum llbitmap_state *state,
+			 loff_t pos)
+{
+	pos += BITMAP_SB_SIZE;
+	*state = llbitmap->barrier[pos >> PAGE_SHIFT].data[offset_in_page(pos)];
+	return 0;
+}
+
+static void llbitmap_set_page_dirty(struct llbitmap *llbitmap, int idx, int offset)
+{
+	struct llbitmap_barrier *barrier = &llbitmap->barrier[idx];
+	bool level_456 = is_raid456(llbitmap->mddev);
+	int io_size = llbitmap->io_size;
+	int bit = offset / io_size;
+	bool infectious = false;
+	int pos;
+
+	if (!test_bit(LLPageDirty, &barrier->flags))
+		set_bit(LLPageDirty, &barrier->flags);
+
+	/*
+	 * if the bit is already dirty, or other page bytes is the same bit is
+	 * already BitDirty, then mark the whole bytes in the bit as dirty
+	 */
+	if (test_and_set_bit(bit, barrier->dirty)) {
+		infectious = true;
+	} else {
+		for (pos = bit * io_size; pos < (bit + 1) * io_size - 1;
+		     pos++) {
+			if (pos == offset)
+				continue;
+			if (barrier->data[pos] == BitDirty ||
+			    barrier->data[pos] == BitNeedSync) {
+				infectious = true;
+				break;
+			}
+		}
+
+	}
+
+	if (!infectious)
+		return;
+
+	for (pos = bit * io_size; pos < (bit + 1) * io_size - 1; pos++) {
+		if (pos == offset)
+			continue;
+
+		switch (barrier->data[pos]) {
+		case BitUnwritten:
+			barrier->data[pos] = level_456 ? BitNeedSync : BitDirty;
+			break;
+		case BitClean:
+			barrier->data[pos] = BitDirty;
+			break;
+		};
+	}
+}
+
+static int llbitmap_write(struct llbitmap *llbitmap, enum llbitmap_state state,
+			  loff_t pos)
+{
+	int idx;
+	int offset;
+
+	pos += BITMAP_SB_SIZE;
+	idx = pos >> PAGE_SHIFT;
+	offset = offset_in_page(pos);
+
+	llbitmap->barrier[idx].data[offset] = state;
+	if (state == BitDirty || state == BitNeedSync)
+		llbitmap_set_page_dirty(llbitmap, idx, offset);
+	return 0;
+}
+
+static void llbitmap_free_pages(struct llbitmap *llbitmap)
+{
+	int i;
+
+	for (i = 0; i < BITMAP_MAX_PAGES; i++) {
+		struct page *page = llbitmap->pages[i];
+
+		if (!page)
+			return;
+
+		llbitmap->pages[i] = NULL;
+		__free_page(page);
+		percpu_ref_exit(&llbitmap->barrier[i].active);
+	}
+}
+
+static struct page *llbitmap_read_page(struct llbitmap *llbitmap, int idx)
+{
+	struct page *page = llbitmap->pages[idx];
+	struct mddev *mddev = llbitmap->mddev;
+	struct md_rdev *rdev;
+
+	if (page)
+		return page;
+
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		return ERR_PTR(-ENOMEM);
+
+	rdev_for_each(rdev, mddev) {
+		sector_t sector;
+
+		if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+			continue;
+
+		sector = mddev->bitmap_info.offset + (idx << PAGE_SECTORS_SHIFT);
+
+		if (sync_page_io(rdev, sector, PAGE_SIZE, page, REQ_OP_READ, true))
+			return page;
+
+		md_error(mddev, rdev);
+	}
+
+	__free_page(page);
+	return ERR_PTR(-EIO);
+}
+
+static void llbitmap_write_page(struct llbitmap *llbitmap, int idx)
+{
+	struct page *page = llbitmap->pages[idx];
+	struct mddev *mddev = llbitmap->mddev;
+	struct md_rdev *rdev;
+	int bit;
+
+	for (bit = 0; bit < llbitmap->bits_per_page; bit++) {
+		struct llbitmap_barrier *barrier = &llbitmap->barrier[idx];
+
+		if (!test_and_clear_bit(bit, barrier->dirty))
+			continue;
+
+		rdev_for_each(rdev, mddev) {
+			sector_t sector;
+			sector_t bit_sector = llbitmap->io_size >> SECTOR_SHIFT;
+
+			if (rdev->raid_disk < 0 || test_bit(Faulty, &rdev->flags))
+				continue;
+
+			sector = mddev->bitmap_info.offset + rdev->sb_start +
+				 (idx << PAGE_SECTORS_SHIFT) +
+				 bit * bit_sector;
+			md_super_write(mddev, rdev, sector, llbitmap->io_size,
+				       page, bit * llbitmap->io_size);
+		}
+	}
+}
+
+static int llbitmap_cache_pages(struct llbitmap *llbitmap)
+{
+	int nr_pages = (llbitmap->chunks + BITMAP_SB_SIZE + PAGE_SIZE - 1) / PAGE_SIZE;
+	struct page *page;
+	int i = 0;
+
+	llbitmap->nr_pages = nr_pages;
+	while (i < nr_pages) {
+		page = llbitmap_read_page(llbitmap, i);
+		if (IS_ERR(page)) {
+			llbitmap_free_pages(llbitmap);
+			return PTR_ERR(page);
+		}
+
+		if (percpu_ref_init(&llbitmap->barrier[i].active, active_release,
+				    PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
+			__free_page(page);
+			return -ENOMEM;
+		}
+
+		init_waitqueue_head(&llbitmap->barrier[i].wait);
+		llbitmap->barrier[i].data = page_address(page);
+		llbitmap->pages[i++] = page;
+	}
+
+	return 0;
+}
-- 
2.39.2


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ