[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20250729090616.1008288-1-yukuai1@huaweicloud.com>
Date: Tue, 29 Jul 2025 17:06:16 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: hch@....de,
houtao1@...wei.com,
axboe@...nel.dk
Cc: linux-block@...r.kernel.org,
linux-kernel@...r.kernel.org,
yukuai3@...wei.com,
yukuai1@...weicloud.com,
yi.zhang@...wei.com,
yangerkun@...wei.com,
johnny.chenyi@...wei.com
Subject: [PATCH v2] brd: use page reference to protect page lifetime
From: Yu Kuai <yukuai3@...wei.com>
As discussed [1], hold rcu for copying data from/to page is too heavy.
it's better to protect page with rcu around for page lookup and then
grab a reference to prevent page to be freed by discard.
[1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/
Signed-off-by: Yu Kuai <yukuai3@...wei.com>
---
Changes from v1:
- refer to filemap_get_entry(), use xas_load + xas_reload to fix
concurrent problems.
drivers/block/brd.c | 73 ++++++++++++++++++++++++++++-----------------
1 file changed, 46 insertions(+), 27 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 0c2eabe14af3..b7a0448ca928 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -44,45 +44,72 @@ struct brd_device {
};
/*
- * Look up and return a brd's page for a given sector.
+ * Look up and return a brd's page with reference grabbed for a given sector.
*/
static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
{
- return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+ struct page *page;
+ XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
+
+ rcu_read_lock();
+repeat:
+ xas_reset(&xas);
+ page = xas_load(&xas);
+ if (xas_retry(&xas, page))
+ goto repeat;
+
+ if (!page || xa_is_value(page)) {
+ page = NULL;
+ goto out;
+ }
+
+ if (!get_page_unless_zero(page))
+ goto repeat;
+
+ if (unlikely(page != xas_reload(&xas))) {
+ put_page(page);
+ goto repeat;
+ }
+out:
+ rcu_read_unlock();
+
+ return page;
}
/*
* Insert a new page for a given sector, if one does not already exist.
+ * The returned page will grab reference.
*/
static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
blk_opf_t opf)
- __releases(rcu)
- __acquires(rcu)
{
gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
struct page *page, *ret;
- rcu_read_unlock();
page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
- if (!page) {
- rcu_read_lock();
+ if (!page)
return ERR_PTR(-ENOMEM);
- }
xa_lock(&brd->brd_pages);
ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
page, gfp);
- rcu_read_lock();
- if (ret) {
+ if (!ret) {
+ brd->brd_nr_pages++;
+ get_page(page);
xa_unlock(&brd->brd_pages);
- __free_page(page);
- if (xa_is_err(ret))
- return ERR_PTR(xa_err(ret));
+ return page;
+ }
+
+ if (!xa_is_err(ret)) {
+ get_page(ret);
+ xa_unlock(&brd->brd_pages);
+ put_page(page);
return ret;
}
- brd->brd_nr_pages++;
+
xa_unlock(&brd->brd_pages);
- return page;
+ put_page(page);
+ return ERR_PTR(xa_err(ret));
}
/*
@@ -95,7 +122,7 @@ static void brd_free_pages(struct brd_device *brd)
pgoff_t idx;
xa_for_each(&brd->brd_pages, idx, page) {
- __free_page(page);
+ put_page(page);
cond_resched();
}
@@ -117,7 +144,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
- rcu_read_lock();
page = brd_lookup_page(brd, sector);
if (!page && op_is_write(opf)) {
page = brd_insert_page(brd, sector, opf);
@@ -135,13 +161,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
memset(kaddr, 0, bv.bv_len);
}
kunmap_local(kaddr);
- rcu_read_unlock();
bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
+ if (page)
+ put_page(page);
return true;
out_error:
- rcu_read_unlock();
if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
bio_wouldblock_error(bio);
else
@@ -149,13 +175,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
return false;
}
-static void brd_free_one_page(struct rcu_head *head)
-{
- struct page *page = container_of(head, struct page, rcu_head);
-
- __free_page(page);
-}
-
static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
{
sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
@@ -170,7 +189,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
if (page) {
- call_rcu(&page->rcu_head, brd_free_one_page);
+ put_page(page);
brd->brd_nr_pages--;
}
aligned_sector += PAGE_SECTORS;
--
2.39.2
Powered by blists - more mailing lists