[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <fb80019b-06aa-f491-4d25-2f413d832741@huaweicloud.com>
Date: Mon, 11 Aug 2025 09:42:28 +0800
From: Yu Kuai <yukuai1@...weicloud.com>
To: Hou Tao <houtao@...weicloud.com>, Yu Kuai <yukuai1@...weicloud.com>,
hch@....de, axboe@...nel.dk
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
yi.zhang@...wei.com, yangerkun@...wei.com, johnny.chenyi@...wei.com,
"yukuai (C)" <yukuai3@...wei.com>
Subject: Re: [PATCH v2] brd: use page reference to protect page lifetime
Hi,
在 2025/08/09 10:28, Hou Tao 写道:
>
>
> On 7/29/2025 5:06 PM, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@...wei.com>
>>
>> As discussed [1], hold rcu for copying data from/to page is too heavy.
>> it's better to protect page with rcu around for page lookup and then
>> grab a reference to prevent page to be freed by discard.
>>
>> [1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/
>>
>> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
>> ---
>> Changes from v1:
>> - refer to filemap_get_entry(), use xas_load + xas_reload to fix
>> concurrent problems.
>>
>> drivers/block/brd.c | 73 ++++++++++++++++++++++++++++-----------------
>> 1 file changed, 46 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
>> index 0c2eabe14af3..b7a0448ca928 100644
>> --- a/drivers/block/brd.c
>> +++ b/drivers/block/brd.c
>> @@ -44,45 +44,72 @@ struct brd_device {
>> };
>>
>> /*
>> - * Look up and return a brd's page for a given sector.
>> + * Look up and return a brd's page with reference grabbed for a given sector.
>> */
>> static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
>> {
>> - return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
>> + struct page *page;
>> + XA_STATE(xas, &brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
>> +
>> + rcu_read_lock();
>> +repeat:
>> + xas_reset(&xas);
>
> Is it better to move xas_reset() to the failing branches instead of
> adding an extra xas_reset() for the success branch ?
Ok.
>> + page = xas_load(&xas);
>> + if (xas_retry(&xas, page))
>> + goto repeat;
>> +
>> + if (!page || xa_is_value(page)) {
>> + page = NULL;
>> + goto out;
>> + }
>
> brd will not store special value in the xarray, so xa_is_value() is
> unnecessary.
Yes, this is correct.
Thanks,
Kuai
>> +
>> + if (!get_page_unless_zero(page))
>> + goto repeat;
>> +
>> + if (unlikely(page != xas_reload(&xas))) {
>> + put_page(page);
>> + goto repeat;
>> + }
>> +out:
>> + rcu_read_unlock();
>> +
>> + return page;
>> }
>>
>> /*
>> * Insert a new page for a given sector, if one does not already exist.
>> + * The returned page will grab reference.
>> */
>> static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
>> blk_opf_t opf)
>> - __releases(rcu)
>> - __acquires(rcu)
>> {
>> gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
>> struct page *page, *ret;
>>
>> - rcu_read_unlock();
>> page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
>> - if (!page) {
>> - rcu_read_lock();
>> + if (!page)
>> return ERR_PTR(-ENOMEM);
>> - }
>>
>> xa_lock(&brd->brd_pages);
>> ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
>> page, gfp);
>> - rcu_read_lock();
>> - if (ret) {
>> + if (!ret) {
>> + brd->brd_nr_pages++;
>> + get_page(page);
>> xa_unlock(&brd->brd_pages);
>> - __free_page(page);
>> - if (xa_is_err(ret))
>> - return ERR_PTR(xa_err(ret));
>> + return page;
>> + }
>> +
>> + if (!xa_is_err(ret)) {
>> + get_page(ret);
>> + xa_unlock(&brd->brd_pages);
>> + put_page(page);
>> return ret;
>> }
>> - brd->brd_nr_pages++;
>> +
>> xa_unlock(&brd->brd_pages);
>> - return page;
>> + put_page(page);
>> + return ERR_PTR(xa_err(ret));
>> }
>>
>> /*
>> @@ -95,7 +122,7 @@ static void brd_free_pages(struct brd_device *brd)
>> pgoff_t idx;
>>
>> xa_for_each(&brd->brd_pages, idx, page) {
>> - __free_page(page);
>> + put_page(page);
>> cond_resched();
>> }
>>
>> @@ -117,7 +144,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>>
>> bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
>>
>> - rcu_read_lock();
>> page = brd_lookup_page(brd, sector);
>> if (!page && op_is_write(opf)) {
>> page = brd_insert_page(brd, sector, opf);
>> @@ -135,13 +161,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>> memset(kaddr, 0, bv.bv_len);
>> }
>> kunmap_local(kaddr);
>> - rcu_read_unlock();
>>
>> bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
>> + if (page)
>> + put_page(page);
>> return true;
>>
>> out_error:
>> - rcu_read_unlock();
>> if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
>> bio_wouldblock_error(bio);
>> else
>> @@ -149,13 +175,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>> return false;
>> }
>>
>> -static void brd_free_one_page(struct rcu_head *head)
>> -{
>> - struct page *page = container_of(head, struct page, rcu_head);
>> -
>> - __free_page(page);
>> -}
>> -
>> static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>> {
>> sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
>> @@ -170,7 +189,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>> while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
>> page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
>> if (page) {
>> - call_rcu(&page->rcu_head, brd_free_one_page);
>> + put_page(page);
>> brd->brd_nr_pages--;
>> }
>> aligned_sector += PAGE_SECTORS;
>
> .
>
Powered by blists - more mailing lists