[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOUHufYijvmo+-jRJWCeDEhvyf4z6EBMA=2ZOW2OBxnC2Y2qLg@mail.gmail.com>
Date: Mon, 17 Jun 2024 14:19:51 -0600
From: Yu Zhao <yuzhao@...gle.com>
To: Chengming Zhou <chengming.zhou@...ux.dev>
Cc: Minchan Kim <minchan@...nel.org>, Sergey Senozhatsky <senozhatsky@...omium.org>,
Andrew Morton <akpm@...ux-foundation.org>, Johannes Weiner <hannes@...xchg.org>,
Yosry Ahmed <yosryahmed@...gle.com>, Nhat Pham <nphamcs@...il.com>,
Takero Funaki <flintglass@...il.com>, Chengming Zhou <zhouchengming@...edance.com>, linux-mm@...ck.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] mm/zswap: use only one pool in zswap
On Mon, Jun 17, 2024 at 2:16 PM Yu Zhao <yuzhao@...gle.com> wrote:
>
> On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@...ux.dev> wrote:
> >
> > Zswap uses 32 pools to workaround the locking scalability problem in
> > zsmalloc,
>
> Note that zpool can have other backends (zbud, z3fold), and the
> original patch was developed (even before zswap could use zsmalloc) to
> make sure it works for all the backend.
>
> This patch only makes sense now only because zsmalloc became a lot
> more popular than other backends (even though some distros still
> default to zbud).
And this might also have answered Yosry's question about the
"historical context" here [1].
[1] https://lore.kernel.org/CAJD7tkbO+ZLdhs-9BpthztZX32i8C4=QEnoiXGS7bM399nqwzg@mail.gmail.com/
> > which brings its own problems like memory waste and more
> > memory fragmentation.
> >
> > Testing results show that we can have near performance with only one
> > pool in zswap after changing zsmalloc to use per-size_class lock instead
> > of pool spinlock.
> >
> > Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB,
> > and zswap shrinker enabled with 10GB swapfile on ext4.
> >
> > real user sys
> > 6.10.0-rc3 138.18 1241.38 1452.73
> > 6.10.0-rc3-onepool 149.45 1240.45 1844.69
> > 6.10.0-rc3-onepool-perclass 138.23 1242.37 1469.71
> >
> > Signed-off-by: Chengming Zhou <chengming.zhou@...ux.dev>
> > ---
> > mm/zswap.c | 60 +++++++++++++++++++-----------------------------------------
> > 1 file changed, 19 insertions(+), 41 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index e25a6808c2ed..5063c5372e51 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
> > module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
> > uint, 0644);
> >
> > -/* Number of zpools in zswap_pool (empirically determined for scalability) */
> > -#define ZSWAP_NR_ZPOOLS 32
> > -
> > /* Enable/disable memory pressure-based shrinker. */
> > static bool zswap_shrinker_enabled = IS_ENABLED(
> > CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
> > @@ -160,7 +157,7 @@ struct crypto_acomp_ctx {
> > * needs to be verified that it's still valid in the tree.
> > */
> > struct zswap_pool {
> > - struct zpool *zpools[ZSWAP_NR_ZPOOLS];
> > + struct zpool *zpool;
> > struct crypto_acomp_ctx __percpu *acomp_ctx;
> > struct percpu_ref ref;
> > struct list_head list;
> > @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
> >
> > #define zswap_pool_debug(msg, p) \
> > pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name, \
> > - zpool_get_type((p)->zpools[0]))
> > + zpool_get_type((p)->zpool))
> >
> > /*********************************
> > * pool functions
> > @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref);
> >
> > static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> > {
> > - int i;
> > struct zswap_pool *pool;
> > char name[38]; /* 'zswap' + 32 char (max) num + \0 */
> > gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> > @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> > if (!pool)
> > return NULL;
> >
> > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
> > - /* unique name for each pool specifically required by zsmalloc */
> > - snprintf(name, 38, "zswap%x",
> > - atomic_inc_return(&zswap_pools_count));
> > -
> > - pool->zpools[i] = zpool_create_pool(type, name, gfp);
> > - if (!pool->zpools[i]) {
> > - pr_err("%s zpool not available\n", type);
> > - goto error;
> > - }
> > + /* unique name for each pool specifically required by zsmalloc */
> > + snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
> > + pool->zpool = zpool_create_pool(type, name, gfp);
> > + if (!pool->zpool) {
> > + pr_err("%s zpool not available\n", type);
> > + goto error;
> > }
> > - pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
> > + pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> >
> > strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> >
> > @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> > error:
> > if (pool->acomp_ctx)
> > free_percpu(pool->acomp_ctx);
> > - while (i--)
> > - zpool_destroy_pool(pool->zpools[i]);
> > + zpool_destroy_pool(pool->zpool);
> > kfree(pool);
> > return NULL;
> > }
> > @@ -361,15 +352,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
> >
> > static void zswap_pool_destroy(struct zswap_pool *pool)
> > {
> > - int i;
> > -
> > zswap_pool_debug("destroying", pool);
> >
> > cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
> > free_percpu(pool->acomp_ctx);
> >
> > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> > - zpool_destroy_pool(pool->zpools[i]);
> > + zpool_destroy_pool(pool->zpool);
> > kfree(pool);
> > }
> >
> > @@ -464,8 +452,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> > list_for_each_entry_rcu(pool, &zswap_pools, list) {
> > if (strcmp(pool->tfm_name, compressor))
> > continue;
> > - /* all zpools share the same type */
> > - if (strcmp(zpool_get_type(pool->zpools[0]), type))
> > + if (strcmp(zpool_get_type(pool->zpool), type))
> > continue;
> > /* if we can't get it, it's about to be destroyed */
> > if (!zswap_pool_get(pool))
> > @@ -492,12 +479,8 @@ unsigned long zswap_total_pages(void)
> > unsigned long total = 0;
> >
> > rcu_read_lock();
> > - list_for_each_entry_rcu(pool, &zswap_pools, list) {
> > - int i;
> > -
> > - for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> > - total += zpool_get_total_pages(pool->zpools[i]);
> > - }
> > + list_for_each_entry_rcu(pool, &zswap_pools, list)
> > + total += zpool_get_total_pages(pool->zpool);
> > rcu_read_unlock();
> >
> > return total;
> > @@ -802,11 +785,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
> > kmem_cache_free(zswap_entry_cache, entry);
> > }
> >
> > -static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> > -{
> > - return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
> > -}
> > -
> > /*
> > * Carries out the common pattern of freeing and entry's zpool allocation,
> > * freeing the entry itself, and decrementing the number of stored pages.
> > @@ -814,7 +792,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> > static void zswap_entry_free(struct zswap_entry *entry)
> > {
> > zswap_lru_del(&zswap_list_lru, entry);
> > - zpool_free(zswap_find_zpool(entry), entry->handle);
> > + zpool_free(entry->pool->zpool, entry->handle);
> > zswap_pool_put(entry->pool);
> > if (entry->objcg) {
> > obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> > @@ -939,7 +917,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> > if (comp_ret)
> > goto unlock;
> >
> > - zpool = zswap_find_zpool(entry);
> > + zpool = entry->pool->zpool;
> > gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> > if (zpool_malloc_support_movable(zpool))
> > gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> > @@ -968,7 +946,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> >
> > static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> > {
> > - struct zpool *zpool = zswap_find_zpool(entry);
> > + struct zpool *zpool = entry->pool->zpool;
> > struct scatterlist input, output;
> > struct crypto_acomp_ctx *acomp_ctx;
> > u8 *src;
> > @@ -1467,7 +1445,7 @@ bool zswap_store(struct folio *folio)
> > return true;
> >
> > store_failed:
> > - zpool_free(zswap_find_zpool(entry), entry->handle);
> > + zpool_free(entry->pool->zpool, entry->handle);
> > put_pool:
> > zswap_pool_put(entry->pool);
> > freepage:
> > @@ -1683,7 +1661,7 @@ static int zswap_setup(void)
> > pool = __zswap_pool_create_fallback();
> > if (pool) {
> > pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> > - zpool_get_type(pool->zpools[0]));
> > + zpool_get_type(pool->zpool));
> > list_add(&pool->list, &zswap_pools);
> > zswap_has_pool = true;
> > static_branch_enable(&zswap_ever_enabled);
> >
> > --
> > 2.45.2
> >
> >
Powered by blists - more mailing lists