[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f84e1519-812b-ee19-ddb6-6670a02a0c40@huaweicloud.com>
Date: Mon, 1 Sep 2025 11:42:14 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: Yu Kuai <yukuai1@...weicloud.com>, hch@....de, houtao1@...wei.com,
axboe@...nel.dk
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, yangerkun@...wei.com, johnny.chenyi@...wei.com,
"yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH v3] brd: use page reference to protect page lifetime
Friendly ping ...
在 2025/08/11 14:56, Yu Kuai 写道:
> From: Yu Kuai <yukuai3@...wei.com>
>
> As discussed [1], hold rcu for copying data from/to page is too heavy,
> it's better to protect page with rcu around for page lookup and then
> grab a reference to prevent page to be freed by discard.
>
> [1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/
>
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
> Changes from v2:
> - move xas_reset() to error path;
> - remove unnecessary checking xa_is_value();
> Changes from v1:
> - refer to filemap_get_entry(), use xas_load + xas_reload to fix
> concurrent problems.
>
> drivers/block/brd.c | 75 +++++++++++++++++++++++++++++----------------
> 1 file changed, 48 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index 0c2eabe14af3..9778259b30d4 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -44,45 +44,74 @@ struct brd_device {
> };
>
> /*
> - * Look up and return a brd's page for a given sector.
> + * Look up and return a brd's page with reference grabbed for a given sector.
> */
> static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
> {
> - return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
> + struct page *page;
> + XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
> +
> + rcu_read_lock();
> +repeat:
> + page = xas_load(&xas);
> + if (xas_retry(&xas, page)) {
> + xas_reset(&xas);
> + goto repeat;
> + }
> +
> + if (!page)
> + goto out;
> +
> + if (!get_page_unless_zero(page)) {
> + xas_reset(&xas);
> + goto repeat;
> + }
> +
> + if (unlikely(page != xas_reload(&xas))) {
> + put_page(page);
> + xas_reset(&xas);
> + goto repeat;
> + }
> +out:
> + rcu_read_unlock();
> +
> + return page;
> }
>
> /*
> * Insert a new page for a given sector, if one does not already exist.
> + * The returned page will grab reference.
> */
> static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
> blk_opf_t opf)
> - __releases(rcu)
> - __acquires(rcu)
> {
> gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
> struct page *page, *ret;
>
> - rcu_read_unlock();
> page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> - if (!page) {
> - rcu_read_lock();
> + if (!page)
> return ERR_PTR(-ENOMEM);
> - }
>
> xa_lock(&brd->brd_pages);
> ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
> page, gfp);
> - rcu_read_lock();
> - if (ret) {
> + if (!ret) {
> + brd->brd_nr_pages++;
> + get_page(page);
> + xa_unlock(&brd->brd_pages);
> + return page;
> + }
> +
> + if (!xa_is_err(ret)) {
> + get_page(ret);
> xa_unlock(&brd->brd_pages);
> - __free_page(page);
> - if (xa_is_err(ret))
> - return ERR_PTR(xa_err(ret));
> + put_page(page);
> return ret;
> }
> - brd->brd_nr_pages++;
> +
> xa_unlock(&brd->brd_pages);
> - return page;
> + put_page(page);
> + return ERR_PTR(xa_err(ret));
> }
>
> /*
> @@ -95,7 +124,7 @@ static void brd_free_pages(struct brd_device *brd)
> pgoff_t idx;
>
> xa_for_each(&brd->brd_pages, idx, page) {
> - __free_page(page);
> + put_page(page);
> cond_resched();
> }
>
> @@ -117,7 +146,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>
> bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
>
> - rcu_read_lock();
> page = brd_lookup_page(brd, sector);
> if (!page && op_is_write(opf)) {
> page = brd_insert_page(brd, sector, opf);
> @@ -135,13 +163,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
> memset(kaddr, 0, bv.bv_len);
> }
> kunmap_local(kaddr);
> - rcu_read_unlock();
>
> bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
> + if (page)
> + put_page(page);
> return true;
>
> out_error:
> - rcu_read_unlock();
> if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
> bio_wouldblock_error(bio);
> else
> @@ -149,13 +177,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
> return false;
> }
>
> -static void brd_free_one_page(struct rcu_head *head)
> -{
> - struct page *page = container_of(head, struct page, rcu_head);
> -
> - __free_page(page);
> -}
> -
> static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
> {
> sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
> @@ -170,7 +191,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
> while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
> page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
> if (page) {
> - call_rcu(&page->rcu_head, brd_free_one_page);
> + put_page(page);
> brd->brd_nr_pages--;
> }
> aligned_sector += PAGE_SECTORS;
>
Powered by blists - more mailing lists