lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <51c0afe6-decc-67bd-0336-177a250acb3b@huaweicloud.com>
Date: Tue, 29 Jul 2025 15:52:38 +0800
From: Hou Tao <houtao@...weicloud.com>
To: Yu Kuai <yukuai1@...weicloud.com>, hch@....de, axboe@...nel.dk
Cc: linux-block@...r.kernel.org, linux-kernel@...r.kernel.org,
 yukuai3@...wei.com, yi.zhang@...wei.com, yangerkun@...wei.com,
 johnny.chenyi@...wei.com
Subject: Re: [PATCH] brd: use page reference to protect page lifetime

Hi,

On 7/29/2025 3:09 PM, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@...wei.com>
>
> As discussed [1], hold rcu for copying data from/to page is too heavy.
> it's better to protect page with rcu around for page lookup and then
> grab a reference to prevent page to be freed by discard.
>
> [1] https://lore.kernel.org/all/eb41cab3-5946-4fe3-a1be-843dd6fca159@kernel.dk/
>
> Signed-off-by: Yu Kuai <yukuai3@...wei.com>
> ---
>  drivers/block/brd.c | 56 +++++++++++++++++++++++----------------------
>  1 file changed, 29 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
> index 0c2eabe14af3..ce311d054cab 100644
> --- a/drivers/block/brd.c
> +++ b/drivers/block/brd.c
> @@ -44,45 +44,55 @@ struct brd_device {
>  };
>  
>  /*
> - * Look up and return a brd's page for a given sector.
> + * Look up and return a brd's page with reference grabbed for a given sector.
>   */
>  static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector)
>  {
> -	return xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
> +	struct page *page;
> +
> +	rcu_read_lock();
> +	page = xa_load(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT);
> +	if (page)
> +		get_page(page);

get_page_unless_zero() instead ? Er, it seems even with
get_page_unless_zero(), it is not enough, becausethe page may be reused.
Maybe we need to read the brd_pages xarray to ensure the page is still
there.
> +	rcu_read_unlock();
> +
> +	return page;
>  }
>  
>  /*
>   * Insert a new page for a given sector, if one does not already exist.
> + * The returned page will grab reference.
>   */
>  static struct page *brd_insert_page(struct brd_device *brd, sector_t sector,
>  		blk_opf_t opf)
> -	__releases(rcu)
> -	__acquires(rcu)
>  {
>  	gfp_t gfp = (opf & REQ_NOWAIT) ? GFP_NOWAIT : GFP_NOIO;
>  	struct page *page, *ret;
>  
> -	rcu_read_unlock();
>  	page = alloc_page(gfp | __GFP_ZERO | __GFP_HIGHMEM);
> -	if (!page) {
> -		rcu_read_lock();
> +	if (!page)
>  		return ERR_PTR(-ENOMEM);
> -	}
>  
>  	xa_lock(&brd->brd_pages);
>  	ret = __xa_cmpxchg(&brd->brd_pages, sector >> PAGE_SECTORS_SHIFT, NULL,
>  			page, gfp);
> -	rcu_read_lock();
> -	if (ret) {
> +	if (!ret) {
> +		brd->brd_nr_pages++;
> +		get_page(page);
>  		xa_unlock(&brd->brd_pages);

If I understand correctly, the initial ref-count after alloc_page() is 1
instead of 0, so I think the get_page(page) here and get_page(ret) below
is unnecessary and it will lead to memory leak.
> -		__free_page(page);
> -		if (xa_is_err(ret))
> -			return ERR_PTR(xa_err(ret));
> +		return page;
> +	}
> +
> +	if (!xa_is_err(ret)) {
> +		get_page(ret);
> +		xa_unlock(&brd->brd_pages);
> +		put_page(page);
>  		return ret;
>  	}
> -	brd->brd_nr_pages++;
> +
>  	xa_unlock(&brd->brd_pages);
> -	return page;
> +	put_page(page);
> +	return ERR_PTR(xa_err(ret));
>  }
>  
>  /*
> @@ -95,7 +105,7 @@ static void brd_free_pages(struct brd_device *brd)
>  	pgoff_t idx;
>  
>  	xa_for_each(&brd->brd_pages, idx, page) {
> -		__free_page(page);
> +		put_page(page);
>  		cond_resched();
>  	}
>  
> @@ -117,7 +127,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>  
>  	bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
>  
> -	rcu_read_lock();
>  	page = brd_lookup_page(brd, sector);
>  	if (!page && op_is_write(opf)) {
>  		page = brd_insert_page(brd, sector, opf);
> @@ -135,13 +144,13 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>  			memset(kaddr, 0, bv.bv_len);
>  	}
>  	kunmap_local(kaddr);
> -	rcu_read_unlock();
>  
>  	bio_advance_iter_single(bio, &bio->bi_iter, bv.bv_len);
> +	if (page)
> +		put_page(page);
>  	return true;
>  
>  out_error:
> -	rcu_read_unlock();
>  	if (PTR_ERR(page) == -ENOMEM && (opf & REQ_NOWAIT))
>  		bio_wouldblock_error(bio);
>  	else
> @@ -149,13 +158,6 @@ static bool brd_rw_bvec(struct brd_device *brd, struct bio *bio)
>  	return false;
>  }
>  
> -static void brd_free_one_page(struct rcu_head *head)
> -{
> -	struct page *page = container_of(head, struct page, rcu_head);
> -
> -	__free_page(page);
> -}
> -
>  static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>  {
>  	sector_t aligned_sector = round_up(sector, PAGE_SECTORS);
> @@ -170,7 +172,7 @@ static void brd_do_discard(struct brd_device *brd, sector_t sector, u32 size)
>  	while (aligned_sector < aligned_end && aligned_sector < rd_size * 2) {
>  		page = __xa_erase(&brd->brd_pages, aligned_sector >> PAGE_SECTORS_SHIFT);
>  		if (page) {
> -			call_rcu(&page->rcu_head, brd_free_one_page);
> +			put_page(page);
>  			brd->brd_nr_pages--;
>  		}
>  		aligned_sector += PAGE_SECTORS;


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ