[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <588351c0-93f9-4a04-a923-15aae8b71d49@linux.alibaba.com>
Date: Fri, 18 Oct 2024 09:44:05 +0800
From: Gao Xiang <hsiangkao@...ux.alibaba.com>
To: Chunhai Guo <guochunhai@...o.com>, xiang@...nel.org
Cc: chao@...nel.org, huyue2@...lpad.com, jefflexu@...ux.alibaba.com,
dhavale@...gle.com, linux-erofs@...ts.ozlabs.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] erofs: free pcluster right after decompression if
possible
Hi Chunhai,
Thanks for the work! Please rebase this work on
my "sunset z_erofs_workgroup` series.
On 2024/9/30 22:04, Chunhai Guo wrote:
> Once a pcluster is fully decompressed and there are no attached cached
> pages, its corresponding struct z_erofs_pcluster will be freed. This
Subject: free pclusters if no cached folio attached
cached folios, its corresponding `struct z_erofs_pcluster`...
> will significantly reduce the frequency of calls to erofs_shrink_scan()
> and the memory allocated for struct z_erofs_pcluster.
>
> The tables below show approximately a 95% reduction in the calls to
> erofs_shrink_scan() and in the memory allocated for struct
for `struct z_erofs_pcluster`
> z_erofs_pcluster after applying this patch. The results were obtained by
> performing a test to copy a 2.1 GB partition on ARM64 Android devices
> running the 5.15 kernel with an 8-core CPU and 8GB of memory.
I guess you could try to use more recent kernels for testing instead?
>
> 1. The reduction in calls to erofs_shrink_scan():
> +-----------------+-----------+----------+---------+
> | | w/o patch | w/ patch | diff |
> +-----------------+-----------+----------+---------+
> | Average (times) | 3152 | 160 | -94.92% |
> +-----------------+-----------+----------+---------+
>
> 2. The reduction in memory released by erofs_shrink_scan():
> +-----------------+-----------+----------+---------+
> | | w/o patch | w/ patch | diff |
> +-----------------+-----------+----------+---------+
> | Average (Byte) | 44503200 | 2293760 | -94.84% |
> +-----------------+-----------+----------+---------+
>
> Signed-off-by: Chunhai Guo <guochunhai@...o.com>
> ---
> fs/erofs/internal.h | 3 ++-
> fs/erofs/zdata.c | 14 ++++++++---
> fs/erofs/zutil.c | 58 +++++++++++++++++++++++++++++----------------
> 3 files changed, 51 insertions(+), 24 deletions(-)
>
> diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
> index 4efd578d7c62..17b04bfd743f 100644
> --- a/fs/erofs/internal.h
> +++ b/fs/erofs/internal.h
> @@ -456,7 +456,8 @@ static inline void erofs_pagepool_add(struct page **pagepool, struct page *page)
> void erofs_release_pages(struct page **pagepool);
>
> #ifdef CONFIG_EROFS_FS_ZIP
> -void erofs_workgroup_put(struct erofs_workgroup *grp);
> +void erofs_workgroup_put(struct erofs_sb_info *sbi, struct erofs_workgroup *grp,
> + bool can_released);
> struct erofs_workgroup *erofs_find_workgroup(struct super_block *sb,
> pgoff_t index);
> struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
> diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
> index 8936790618c6..656fd65aec33 100644
> --- a/fs/erofs/zdata.c
> +++ b/fs/erofs/zdata.c
> @@ -888,7 +888,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
> * any longer if the pcluster isn't hosted by ourselves.
> */
> if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE)
> - erofs_workgroup_put(&pcl->obj);
> + erofs_workgroup_put(EROFS_I_SB(fe->inode), &pcl->obj, false);
>
> fe->pcl = NULL;
> }
> @@ -1046,6 +1046,9 @@ struct z_erofs_decompress_backend {
> struct list_head decompressed_secondary_bvecs;
> struct page **pagepool;
> unsigned int onstack_used, nr_pages;
> +
> + /* whether the pcluster can be released after its decompression */
> + bool try_free;
> };
>
> struct z_erofs_bvec_item {
> @@ -1244,12 +1247,15 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
> WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL);
> put_page(page);
> } else {
> + be->try_free = true;
> /* managed folios are still left in compressed_bvecs[] */
> for (i = 0; i < pclusterpages; ++i) {
> page = be->compressed_pages[i];
> if (!page ||
> - erofs_folio_is_managed(sbi, page_folio(page)))
> + erofs_folio_is_managed(sbi, page_folio(page))) {
> + be->try_free = false;
> continue;
> + }
> (void)z_erofs_put_shortlivedpage(be->pagepool, page);
> WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
> }
> @@ -1285,6 +1291,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
> if (be->decompressed_pages != be->onstack_pages)
> kvfree(be->decompressed_pages);
>
> + be->try_free = be->try_free && !pcl->partial;
I think no need to check `pcl->partial`.
> pcl->length = 0;
> pcl->partial = true;
> pcl->multibases = false;
> @@ -1320,7 +1327,8 @@ static int z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
> if (z_erofs_is_inline_pcluster(be.pcl))
> z_erofs_free_pcluster(be.pcl);
> else
> - erofs_workgroup_put(&be.pcl->obj);
> + erofs_workgroup_put(EROFS_SB(io->sb), &be.pcl->obj,
> + be.try_free);
We could just move
if (z_erofs_is_inline_pcluster(be.pcl))
z_erofs_free_pcluster(be.pcl);
else
z_erofs_put_pcluster(be.pcl);
into the end of z_erofs_decompress_pcluster() and
get rid of `be->try_free`;
> }
> return err;
> }
> diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c
> index 37afe2024840..cf59ba6a2322 100644
> --- a/fs/erofs/zutil.c
> +++ b/fs/erofs/zutil.c
> @@ -285,26 +285,11 @@ static void __erofs_workgroup_free(struct erofs_workgroup *grp)
> erofs_workgroup_free_rcu(grp);
> }
>
> -void erofs_workgroup_put(struct erofs_workgroup *grp)
> -{
> - if (lockref_put_or_lock(&grp->lockref))
> - return;
> -
> - DBG_BUGON(__lockref_is_dead(&grp->lockref));
> - if (grp->lockref.count == 1)
> - atomic_long_inc(&erofs_global_shrink_cnt);
> - --grp->lockref.count;
> - spin_unlock(&grp->lockref.lock);
> -}
> -
> -static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> +static bool erofs_prepare_to_release_workgroup(struct erofs_sb_info *sbi,
> struct erofs_workgroup *grp)
> {
> - int free = false;
> -
> - spin_lock(&grp->lockref.lock);
> if (grp->lockref.count)
> - goto out;
> + return false;
>
> /*
> * Note that all cached pages should be detached before deleted from
> @@ -312,7 +297,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> * the orphan old workgroup when the new one is available in the tree.
> */
> if (erofs_try_to_free_all_cached_folios(sbi, grp))
> - goto out;
> + return false;
>
> /*
> * It's impossible to fail after the workgroup is freezed,
> @@ -322,14 +307,47 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp);
>
> lockref_mark_dead(&grp->lockref);
> - free = true;
> -out:
> + return true;
> +}
> +
> +static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi,
> + struct erofs_workgroup *grp)
> +{
> + bool free = false;
> +
> + /* Using trylock to avoid deadlock with erofs_workgroup_put() */
> + if (!spin_trylock(&grp->lockref.lock))
> + return free;
> + free = erofs_prepare_to_release_workgroup(sbi, grp);
> spin_unlock(&grp->lockref.lock);
> if (free)
> __erofs_workgroup_free(grp);
> return free;
> }
>
> +void erofs_workgroup_put(struct erofs_sb_info *sbi, struct erofs_workgroup *grp,
> + bool try_free)
> +{
> + bool free = false;
> +
> + if (lockref_put_or_lock(&grp->lockref))
> + return;
> +
> + DBG_BUGON(__lockref_is_dead(&grp->lockref));
> + if (--grp->lockref.count == 0) {
> + atomic_long_inc(&erofs_global_shrink_cnt);
> +
> + if (try_free) {
> + xa_lock(&sbi->managed_pslots);
> + free = erofs_prepare_to_release_workgroup(sbi, grp);
> + xa_unlock(&sbi->managed_pslots);
> + }
> + }
> + spin_unlock(&grp->lockref.lock);
> + if (free)
> + __erofs_workgroup_free(grp);
need to wait for a RCU grace period.
Thanks,
Gao Xiang
Powered by blists - more mailing lists