lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOUHufYijvmo+-jRJWCeDEhvyf4z6EBMA=2ZOW2OBxnC2Y2qLg@mail.gmail.com>
Date: Mon, 17 Jun 2024 14:19:51 -0600
From: Yu Zhao <yuzhao@...gle.com>
To: Chengming Zhou <chengming.zhou@...ux.dev>
Cc: Minchan Kim <minchan@...nel.org>, Sergey Senozhatsky <senozhatsky@...omium.org>, 
	Andrew Morton <akpm@...ux-foundation.org>, Johannes Weiner <hannes@...xchg.org>, 
	Yosry Ahmed <yosryahmed@...gle.com>, Nhat Pham <nphamcs@...il.com>, 
	Takero Funaki <flintglass@...il.com>, Chengming Zhou <zhouchengming@...edance.com>, linux-mm@...ck.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 2/2] mm/zswap: use only one pool in zswap

On Mon, Jun 17, 2024 at 2:16 PM Yu Zhao <yuzhao@...gle.com> wrote:
>
> On Mon, Jun 17, 2024 at 6:58 AM Chengming Zhou <chengming.zhou@...ux.dev> wrote:
> >
> > Zswap uses 32 pools to workaround the locking scalability problem in
> > zsmalloc,
>
> Note that zpool can have other backends (zbud, z3fold), and the
> original patch was developed (even before zswap could use zsmalloc) to
> make sure it works for all the backend.
>
> This patch only makes sense now only because zsmalloc became a lot
> more popular than other backends (even though some distros still
> default to zbud).

And this might also have answered Yosry's question about the
"historical context" here [1].

[1] https://lore.kernel.org/CAJD7tkbO+ZLdhs-9BpthztZX32i8C4=QEnoiXGS7bM399nqwzg@mail.gmail.com/



> > which brings its own problems like memory waste and more
> > memory fragmentation.
> >
> > Testing results show that we can have near performance with only one
> > pool in zswap after changing zsmalloc to use per-size_class lock instead
> > of pool spinlock.
> >
> > Testing kernel build (make bzImage -j32) on tmpfs with memory.max=1GB,
> > and zswap shrinker enabled with 10GB swapfile on ext4.
> >
> >                                 real    user    sys
> > 6.10.0-rc3                      138.18  1241.38 1452.73
> > 6.10.0-rc3-onepool              149.45  1240.45 1844.69
> > 6.10.0-rc3-onepool-perclass     138.23  1242.37 1469.71
> >
> > Signed-off-by: Chengming Zhou <chengming.zhou@...ux.dev>
> > ---
> >  mm/zswap.c | 60 +++++++++++++++++++-----------------------------------------
> >  1 file changed, 19 insertions(+), 41 deletions(-)
> >
> > diff --git a/mm/zswap.c b/mm/zswap.c
> > index e25a6808c2ed..5063c5372e51 100644
> > --- a/mm/zswap.c
> > +++ b/mm/zswap.c
> > @@ -122,9 +122,6 @@ static unsigned int zswap_accept_thr_percent = 90; /* of max pool size */
> >  module_param_named(accept_threshold_percent, zswap_accept_thr_percent,
> >                    uint, 0644);
> >
> > -/* Number of zpools in zswap_pool (empirically determined for scalability) */
> > -#define ZSWAP_NR_ZPOOLS 32
> > -
> >  /* Enable/disable memory pressure-based shrinker. */
> >  static bool zswap_shrinker_enabled = IS_ENABLED(
> >                 CONFIG_ZSWAP_SHRINKER_DEFAULT_ON);
> > @@ -160,7 +157,7 @@ struct crypto_acomp_ctx {
> >   * needs to be verified that it's still valid in the tree.
> >   */
> >  struct zswap_pool {
> > -       struct zpool *zpools[ZSWAP_NR_ZPOOLS];
> > +       struct zpool *zpool;
> >         struct crypto_acomp_ctx __percpu *acomp_ctx;
> >         struct percpu_ref ref;
> >         struct list_head list;
> > @@ -237,7 +234,7 @@ static inline struct xarray *swap_zswap_tree(swp_entry_t swp)
> >
> >  #define zswap_pool_debug(msg, p)                               \
> >         pr_debug("%s pool %s/%s\n", msg, (p)->tfm_name,         \
> > -                zpool_get_type((p)->zpools[0]))
> > +                zpool_get_type((p)->zpool))
> >
> >  /*********************************
> >  * pool functions
> > @@ -246,7 +243,6 @@ static void __zswap_pool_empty(struct percpu_ref *ref);
> >
> >  static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >  {
> > -       int i;
> >         struct zswap_pool *pool;
> >         char name[38]; /* 'zswap' + 32 char (max) num + \0 */
> >         gfp_t gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> > @@ -267,18 +263,14 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >         if (!pool)
> >                 return NULL;
> >
> > -       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++) {
> > -               /* unique name for each pool specifically required by zsmalloc */
> > -               snprintf(name, 38, "zswap%x",
> > -                        atomic_inc_return(&zswap_pools_count));
> > -
> > -               pool->zpools[i] = zpool_create_pool(type, name, gfp);
> > -               if (!pool->zpools[i]) {
> > -                       pr_err("%s zpool not available\n", type);
> > -                       goto error;
> > -               }
> > +       /* unique name for each pool specifically required by zsmalloc */
> > +       snprintf(name, 38, "zswap%x", atomic_inc_return(&zswap_pools_count));
> > +       pool->zpool = zpool_create_pool(type, name, gfp);
> > +       if (!pool->zpool) {
> > +               pr_err("%s zpool not available\n", type);
> > +               goto error;
> >         }
> > -       pr_debug("using %s zpool\n", zpool_get_type(pool->zpools[0]));
> > +       pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
> >
> >         strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
> >
> > @@ -311,8 +303,7 @@ static struct zswap_pool *zswap_pool_create(char *type, char *compressor)
> >  error:
> >         if (pool->acomp_ctx)
> >                 free_percpu(pool->acomp_ctx);
> > -       while (i--)
> > -               zpool_destroy_pool(pool->zpools[i]);
> > +       zpool_destroy_pool(pool->zpool);
> >         kfree(pool);
> >         return NULL;
> >  }
> > @@ -361,15 +352,12 @@ static struct zswap_pool *__zswap_pool_create_fallback(void)
> >
> >  static void zswap_pool_destroy(struct zswap_pool *pool)
> >  {
> > -       int i;
> > -
> >         zswap_pool_debug("destroying", pool);
> >
> >         cpuhp_state_remove_instance(CPUHP_MM_ZSWP_POOL_PREPARE, &pool->node);
> >         free_percpu(pool->acomp_ctx);
> >
> > -       for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> > -               zpool_destroy_pool(pool->zpools[i]);
> > +       zpool_destroy_pool(pool->zpool);
> >         kfree(pool);
> >  }
> >
> > @@ -464,8 +452,7 @@ static struct zswap_pool *zswap_pool_find_get(char *type, char *compressor)
> >         list_for_each_entry_rcu(pool, &zswap_pools, list) {
> >                 if (strcmp(pool->tfm_name, compressor))
> >                         continue;
> > -               /* all zpools share the same type */
> > -               if (strcmp(zpool_get_type(pool->zpools[0]), type))
> > +               if (strcmp(zpool_get_type(pool->zpool), type))
> >                         continue;
> >                 /* if we can't get it, it's about to be destroyed */
> >                 if (!zswap_pool_get(pool))
> > @@ -492,12 +479,8 @@ unsigned long zswap_total_pages(void)
> >         unsigned long total = 0;
> >
> >         rcu_read_lock();
> > -       list_for_each_entry_rcu(pool, &zswap_pools, list) {
> > -               int i;
> > -
> > -               for (i = 0; i < ZSWAP_NR_ZPOOLS; i++)
> > -                       total += zpool_get_total_pages(pool->zpools[i]);
> > -       }
> > +       list_for_each_entry_rcu(pool, &zswap_pools, list)
> > +               total += zpool_get_total_pages(pool->zpool);
> >         rcu_read_unlock();
> >
> >         return total;
> > @@ -802,11 +785,6 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
> >         kmem_cache_free(zswap_entry_cache, entry);
> >  }
> >
> > -static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> > -{
> > -       return entry->pool->zpools[hash_ptr(entry, ilog2(ZSWAP_NR_ZPOOLS))];
> > -}
> > -
> >  /*
> >   * Carries out the common pattern of freeing and entry's zpool allocation,
> >   * freeing the entry itself, and decrementing the number of stored pages.
> > @@ -814,7 +792,7 @@ static struct zpool *zswap_find_zpool(struct zswap_entry *entry)
> >  static void zswap_entry_free(struct zswap_entry *entry)
> >  {
> >         zswap_lru_del(&zswap_list_lru, entry);
> > -       zpool_free(zswap_find_zpool(entry), entry->handle);
> > +       zpool_free(entry->pool->zpool, entry->handle);
> >         zswap_pool_put(entry->pool);
> >         if (entry->objcg) {
> >                 obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
> > @@ -939,7 +917,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> >         if (comp_ret)
> >                 goto unlock;
> >
> > -       zpool = zswap_find_zpool(entry);
> > +       zpool = entry->pool->zpool;
> >         gfp = __GFP_NORETRY | __GFP_NOWARN | __GFP_KSWAPD_RECLAIM;
> >         if (zpool_malloc_support_movable(zpool))
> >                 gfp |= __GFP_HIGHMEM | __GFP_MOVABLE;
> > @@ -968,7 +946,7 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
> >
> >  static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
> >  {
> > -       struct zpool *zpool = zswap_find_zpool(entry);
> > +       struct zpool *zpool = entry->pool->zpool;
> >         struct scatterlist input, output;
> >         struct crypto_acomp_ctx *acomp_ctx;
> >         u8 *src;
> > @@ -1467,7 +1445,7 @@ bool zswap_store(struct folio *folio)
> >         return true;
> >
> >  store_failed:
> > -       zpool_free(zswap_find_zpool(entry), entry->handle);
> > +       zpool_free(entry->pool->zpool, entry->handle);
> >  put_pool:
> >         zswap_pool_put(entry->pool);
> >  freepage:
> > @@ -1683,7 +1661,7 @@ static int zswap_setup(void)
> >         pool = __zswap_pool_create_fallback();
> >         if (pool) {
> >                 pr_info("loaded using pool %s/%s\n", pool->tfm_name,
> > -                       zpool_get_type(pool->zpools[0]));
> > +                       zpool_get_type(pool->zpool));
> >                 list_add(&pool->list, &zswap_pools);
> >                 zswap_has_pool = true;
> >                 static_branch_enable(&zswap_ever_enabled);
> >
> > --
> > 2.45.2
> >
> >

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ