[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y/6K5+pC+nXvDsjq@google.com>
Date: Tue, 28 Feb 2023 15:14:47 -0800
From: Minchan Kim <minchan@...nel.org>
To: Sergey Senozhatsky <senozhatsky@...omium.org>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
Yosry Ahmed <yosryahmed@...gle.com>,
linux-kernel@...r.kernel.org, linux-mm@...ck.org
Subject: Re: [PATCHv2 4/6] zsmalloc: rework compaction algorithm
On Thu, Feb 23, 2023 at 12:04:49PM +0900, Sergey Senozhatsky wrote:
> The zsmalloc compaction algorithm has the potential to
> waste some CPU cycles, particularly when compacting pages
> within the same fullness group. This is due to the way it
> selects the head page of the fullness list for source and
> destination pages, and how it reinserts those pages during
> each iteration. The algorithm may first use a page as a
> migration destination and then as a migration source,
> leading to an unnecessary back-and-forth movement of
> objects.
>
> Consider the following fullness list:
>
> PageA PageB PageC PageD PageE
>
> During the first iteration, the compaction algorithm will
> select PageA as the source and PageB as the destination.
> All of PageA's objects will be moved to PageB, and then
> PageA will be released while PageB is reinserted into the
> fullness list.
>
> PageB PageC PageD PageE
>
> During the next iteration, the compaction algorithm will
> again select the head of the list as the source and destination,
> meaning that PageB will now serve as the source and PageC as
> the destination. This will result in the objects being moved
> away from PageB, the same objects that were just moved to PageB
> in the previous iteration.
>
> To prevent this avalanche effect, the compaction algorithm
> should not reinsert the destination page between iterations.
> By doing so, the most optimal page will continue to be used
> and its usage ratio will increase, reducing internal
> fragmentation. The destination page should only be reinserted
> into the fullness list if:
> - It becomes full
> - No source page is available.
>
> Signed-off-by: Sergey Senozhatsky <senozhatsky@...omium.org>
> ---
> mm/zsmalloc.c | 82 ++++++++++++++++++++++++---------------------------
> 1 file changed, 38 insertions(+), 44 deletions(-)
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 1a92ebe338eb..eacf9e32da5c 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -1786,15 +1786,14 @@ struct zs_compact_control {
> int obj_idx;
> };
>
> -static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
> - struct zs_compact_control *cc)
> +static void migrate_zspage(struct zs_pool *pool, struct size_class *class,
> + struct zs_compact_control *cc)
> {
> unsigned long used_obj, free_obj;
> unsigned long handle;
> struct page *s_page = cc->s_page;
> struct page *d_page = cc->d_page;
> int obj_idx = cc->obj_idx;
> - int ret = 0;
>
> while (1) {
> handle = find_alloced_obj(class, s_page, &obj_idx);
> @@ -1807,10 +1806,8 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
> }
>
> /* Stop if there is no more space */
> - if (zspage_full(class, get_zspage(d_page))) {
> - ret = -ENOMEM;
> + if (zspage_full(class, get_zspage(d_page)))
> break;
> - }
>
> used_obj = handle_to_obj(handle);
> free_obj = obj_malloc(pool, get_zspage(d_page), handle);
> @@ -1823,8 +1820,6 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class,
> /* Remember last position in this iteration */
> cc->s_page = s_page;
> cc->obj_idx = obj_idx;
> -
> - return ret;
> }
>
> static struct zspage *isolate_src_zspage(struct size_class *class)
> @@ -2228,57 +2223,56 @@ static unsigned long __zs_compact(struct zs_pool *pool,
> * as well as zpage allocation/free
> */
> spin_lock(&pool->lock);
> - while ((src_zspage = isolate_src_zspage(class))) {
> - /* protect someone accessing the zspage(i.e., zs_map_object) */
> - migrate_write_lock(src_zspage);
> -
> - if (!zs_can_compact(class))
> - break;
> -
> - cc.obj_idx = 0;
> - cc.s_page = get_first_page(src_zspage);
> -
> - while ((dst_zspage = isolate_dst_zspage(class))) {
> - migrate_write_lock_nested(dst_zspage);
> -
> + while (1) {
> + if (!dst_zspage) {
> + dst_zspage = isolate_dst_zspage(class);
> + if (!dst_zspage)
> + goto out;
> + migrate_write_lock(dst_zspage);
> cc.d_page = get_first_page(dst_zspage);
> - /*
> - * If there is no more space in dst_page, resched
> - * and see if anyone had allocated another zspage.
> - */
> - if (!migrate_zspage(pool, class, &cc))
> - break;
> + }
>
> + if (!zs_can_compact(class)) {
> putback_zspage(class, dst_zspage);
> migrate_write_unlock(dst_zspage);
> - dst_zspage = NULL;
> - if (spin_is_contended(&pool->lock))
> - break;
> + goto out;
just break instead of goto
> }
>
> - /* Stop if we couldn't find slot */
> - if (dst_zspage == NULL)
> - break;
> + src_zspage = isolate_src_zspage(class);
> + if (!src_zspage) {
> + putback_zspage(class, dst_zspage);
> + migrate_write_unlock(dst_zspage);
> + goto out;
just break instead of goto
> + }
>
> - putback_zspage(class, dst_zspage);
> - migrate_write_unlock(dst_zspage);
> + migrate_write_lock_nested(src_zspage);
> +
> + cc.obj_idx = 0;
> + cc.s_page = get_first_page(src_zspage);
> + migrate_zspage(pool, class, &cc);
>
> if (putback_zspage(class, src_zspage) == ZS_INUSE_RATIO_0) {
> migrate_write_unlock(src_zspage);
> free_zspage(pool, class, src_zspage);
> pages_freed += class->pages_per_zspage;
> - } else
> + } else {
> migrate_write_unlock(src_zspage);
So here, migratre_wirite_unlock(src_zspage) is done in both conditions
we we could change like this.
ret = putback_zspage(class, src_zspage);
migrate_write_unlock(src_zspage);
if (ret == ZS_INUSE_RATIO_0 or ZS_EMPTY) {
free_zspage();
xxx
}
> - spin_unlock(&pool->lock);
> - cond_resched();
> - spin_lock(&pool->lock);
> - }
> + }
>
> - if (src_zspage) {
> - putback_zspage(class, src_zspage);
> - migrate_write_unlock(src_zspage);
> - }
> + if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100
> + || spin_is_contended(&pool->lock)) {
> + putback_zspage(class, dst_zspage);
> + migrate_write_unlock(dst_zspage);
> + dst_zspage = NULL;
spin_unlock(&pool->lock);
cond_resched()
spin_lock(&pool->lock);
> + }
>
> + if (!dst_zspage) {
Then we could remove the condition logic, here.
> + spin_unlock(&pool->lock);
> + cond_resched();
> + spin_lock(&pool->lock);
> + }
> + }
> +out:
> spin_unlock(&pool->lock);
>
> return pages_freed;
So, how about this on top of your patch?
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index eacf9e32da5c..4dfc910f5d89 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -2223,40 +2223,33 @@ static unsigned long __zs_compact(struct zs_pool *pool,
* as well as zpage allocation/free
*/
spin_lock(&pool->lock);
- while (1) {
+ while (zs_can_compact(class)) {
+ int ret;
+
if (!dst_zspage) {
dst_zspage = isolate_dst_zspage(class);
if (!dst_zspage)
- goto out;
+ break;
migrate_write_lock(dst_zspage);
cc.d_page = get_first_page(dst_zspage);
}
- if (!zs_can_compact(class)) {
- putback_zspage(class, dst_zspage);
- migrate_write_unlock(dst_zspage);
- goto out;
- }
-
src_zspage = isolate_src_zspage(class);
- if (!src_zspage) {
- putback_zspage(class, dst_zspage);
- migrate_write_unlock(dst_zspage);
- goto out;
- }
+ if (!src_zspage)
+ break;
migrate_write_lock_nested(src_zspage);
-
cc.obj_idx = 0;
cc.s_page = get_first_page(src_zspage);
+
migrate_zspage(pool, class, &cc);
+ ret = putback_zspage(class, src_zspage);
+ migrate_write_unlock(src_zspage);
- if (putback_zspage(class, src_zspage) == ZS_INUSE_RATIO_0) {
- migrate_write_unlock(src_zspage);
+ if (ret == ZS_INUSE_RATIO_0) {
free_zspage(pool, class, src_zspage);
pages_freed += class->pages_per_zspage;
- } else {
- migrate_write_unlock(src_zspage);
+ src_zspage = NULL;
}
if (get_fullness_group(class, dst_zspage) == ZS_INUSE_RATIO_100
@@ -2264,14 +2257,22 @@ static unsigned long __zs_compact(struct zs_pool *pool,
putback_zspage(class, dst_zspage);
migrate_write_unlock(dst_zspage);
dst_zspage = NULL;
- }
- if (!dst_zspage) {
spin_unlock(&pool->lock);
cond_resched();
spin_lock(&pool->lock);
}
}
+
+ if (src_zspage) {
+ putback_zspage(class, src_zspage);
+ migrate_write_unlock(src_zspage);
+ }
+
+ if (dst_zspage) {
+ putback_zspage(class, dst_zspage);
+ migrate_write_unlock(dst_zspage);
+ }
out:
spin_unlock(&pool->lock);
Powered by blists - more mailing lists