[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <0f525b1e-820c-4bbd-b8ba-59a16fd73ade@linux.alibaba.com>
Date: Tue, 12 Nov 2024 12:03:13 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Chunhai Guo <guochunhai@...o.com>, xiang@...nel.org
Cc: chao@...nel.org, huyue2@...lpad.com, jefflexu@...ux.alibaba.com,
dhavale@...gle.com, linux-erofs@...ts.ozlabs.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v5] erofs: free pclusters if no cached folio is attached
On 2024/11/12 12:11, Chunhai Guo wrote:
> Once a pcluster is fully decompressed and there are no attached cached
> folios, its corresponding `struct z_erofs_pcluster` will be freed. This
> will significantly reduce the frequency of calls to erofs_shrink_scan()
> and the memory allocated for `struct z_erofs_pcluster`.
>
> The tables below show approximately a 96% reduction in the calls to
> erofs_shrink_scan() and in the memory allocated for `struct
> z_erofs_pcluster` after applying this patch. The results were obtained
> by performing a test to copy a 4.1GB partition on ARM64 Android devices
> running the 6.6 kernel with an 8-core CPU and 12GB of memory.
>
> 1. The reduction in calls to erofs_shrink_scan():
> +-----------------+-----------+----------+---------+
> | | w/o patch | w/ patch | diff |
> +-----------------+-----------+----------+---------+
> | Average (times) | 11390 | 390 | -96.57% |
> +-----------------+-----------+----------+---------+
>
> 2. The reduction in memory released by erofs_shrink_scan():
> +-----------------+-----------+----------+---------+
> | | w/o patch | w/ patch | diff |
> +-----------------+-----------+----------+---------+
> | Average (Byte) | 133612656 | 4434552 | -96.68% |
> +-----------------+-----------+----------+---------+
>
> Signed-off-by: Chunhai Guo <guochunhai@...o.com>
> ---
> v4 -> v5:
> - modify subject to be more formal
> - `--pcl->lockref.count == 0` --> `!--pcl->lockref.count`
>
> v3 -> v4:
> - modify the patch as Gao Xiang suggested in v3.
>
> v2 -> v3:
> - rename erofs_prepare_to_release_pcluster() to __erofs_try_to_release_pcluster()
> - use trylock in z_erofs_put_pcluster() instead of erofs_try_to_release_pcluster()
>
> v1: https://lore.kernel.org/linux-erofs/588351c0-93f9-4a04-a923-15aae8b71d49@linux.alibaba.com/
> change since v1:
> - rebase this patch on "sunset z_erofs_workgroup` series
> - remove check on pcl->partial and get rid of `be->try_free`
> - update test results base on 6.6 kernel
> ---
> fs/erofs/zdata.c | 54 ++++++++++++++++++++++++++++++++----------------
> 1 file changed, 36 insertions(+), 18 deletions(-)
>
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index 6b73a2307460..737f33d28c40 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -885,14 +885,11 @@ static void z_erofs_rcu_callback(struct rcu_head *head)
> struct z_erofs_pcluster, rcu));
> }
>
> -static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
> +static bool __erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
> struct z_erofs_pcluster *pcl)
> {
> - int free = false;
> -
> - spin_lock(&pcl->lockref.lock);
> if (pcl->lockref.count)
> - goto out;
> + return false;
>
> /*
> * Note that all cached folios should be detached before deleted from
> @@ -900,7 +897,7 @@ static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
> * orphan old pcluster when the new one is available in the tree.
> */
> if (erofs_try_to_free_all_cached_folios(sbi, pcl))
> - goto out;
> + return false;
>
> /*
> * It's impossible to fail after the pcluster is freezed, but in order
> @@ -909,8 +906,16 @@ static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
> DBG_BUGON(__xa_erase(&sbi->managed_pslots, pcl->index) != pcl);
>
> lockref_mark_dead(&pcl->lockref);
> - free = true;
> -out:
> + return true;
> +}
> +
> +static bool erofs_try_to_release_pcluster(struct erofs_sb_info *sbi,
> + struct z_erofs_pcluster *pcl)
> +{
> + bool free;
> +
> + spin_lock(&pcl->lockref.lock);
> + free = __erofs_try_to_release_pcluster(sbi, pcl);
> spin_unlock(&pcl->lockref.lock);
> if (free) {
> atomic_long_dec(&erofs_global_shrink_cnt);
> @@ -942,16 +947,25 @@ unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi,
> return freed;
> }
>
> -static void z_erofs_put_pcluster(struct z_erofs_pcluster *pcl)
> +static void z_erofs_put_pcluster(struct erofs_sb_info *sbi,
> + struct z_erofs_pcluster *pcl, bool try_free)
> {
> + bool free = false;
> +
> if (lockref_put_or_lock(&pcl->lockref))
> return;
>
> DBG_BUGON(__lockref_is_dead(&pcl->lockref));
> - if (pcl->lockref.count == 1)
> - atomic_long_inc(&erofs_global_shrink_cnt);
> - --pcl->lockref.count;
> + if (!--pcl->lockref.count) {
> + if (try_free && xa_trylock(&sbi->managed_pslots)) {
> + free = __erofs_try_to_release_pcluster(sbi, pcl);
> + xa_unlock(&sbi->managed_pslots);
> + }
> + atomic_long_add(!free, &erofs_global_shrink_cnt);
> + }
> spin_unlock(&pcl->lockref.lock);
> + if (free)
> + call_rcu(&pcl->rcu, z_erofs_rcu_callback);
> }
>
> static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
> @@ -972,7 +986,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
> * any longer if the pcluster isn't hosted by ourselves.
> */
> if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
> - z_erofs_put_pcluster(pcl);
> + z_erofs_put_pcluster(EROFS_I_SB(fe->inode), pcl, false);
>
> fe->pcl = NULL;
> }
> @@ -1274,6 +1288,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
> int i, j, jtop, err2;
> struct page *page;
> bool overlapped;
> + bool try_free = true;
>
> mutex_lock(&pcl->lock);
> be->nr_pages = PAGE_ALIGN(pcl->length + pcl->pageofs_out) >> PAGE_SHIFT;
> @@ -1332,8 +1347,10 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
> for (i = 0; i < pclusterpages; ++i) {
> page = be->compressed_pages[i];
> if (!page ||
> - erofs_folio_is_managed(sbi, page_folio(page)))
> + erofs_folio_is_managed(sbi, page_folio(page))) {
another issue:
if (!page)
continue;
if (erofs_folio_is_managed(sbi, page_folio(page)) {
try_free = false;
continue;
}
!page could happen if some memory allocation is failed and
we need to bail out.
Thanks,
Gao Xiang
Powered by blists - more mailing lists