[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mldy4ayvdlmdz2c6spsmbuwiekvqtnxoj2lzg2ktehmdefsees@wdi7vw7kliuq>
Date: Fri, 9 Jan 2026 16:02:51 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Sergey Senozhatsky <senozhatsky@...omium.org>
Cc: Herbert Xu <herbert@...dor.apana.org.au>,
Andrew Morton <akpm@...ux-foundation.org>, Nhat Pham <nphamcs@...il.com>, Minchan Kim <minchan@...nel.org>,
Johannes Weiner <hannes@...xchg.org>, Brian Geffon <bgeffon@...gle.com>, linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Subject: Re: [RFC PATCH 2/2] zsmalloc: chain-length configuration should
consider other metrics
On Fri, Jan 09, 2026 at 12:29:58PM +0900, Sergey Senozhatsky wrote:
> On (26/01/08 08:01), Yosry Ahmed wrote:
> > > Yeah I agree, I guess I can cook something up.
> > >
> > > For transition period we can have:
> > > - current "memcpy" API
> > > for zswap
> > >
> > > - SG-list API
> > >
> > > I can vmap either on the zram side or have new zsmalloc vmap API
> > > (alongside the memcpy and SG-list APIs).
> > >
> > > Once crypto API supports SG-list and algorithms tunables I can
> > > switch zram over from zcomp to crypto API and remove memcpy and
> > > vmap APIs from zsmalloc.
> >
> > IIUC based on Herbert's previous response, crypto and scomp already
> > support passing in a discontiguous SG-list. So for zswap, if zsmalloc
> > returns an SG-list, it will just be passed as-is to the crypto API.
>
> Oh, okay,
>
> Something like below? Not really familiar with SG-list API.
That makes two of us :P
Herbert, do you mind taking a look at this? It looks sane to me except
for one question below.
I can try to test this next week with zswap and see if it blows up.
>
> ---
> include/linux/zsmalloc.h | 4 +++
> mm/zsmalloc.c | 65 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 69 insertions(+)
>
> diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
> index 5565c3171007..11e614663dd3 100644
> --- a/include/linux/zsmalloc.h
> +++ b/include/linux/zsmalloc.h
> @@ -22,6 +22,7 @@ struct zs_pool_stats {
> };
>
> struct zs_pool;
> +struct scatterlist;
>
> struct zs_pool *zs_create_pool(const char *name);
> void zs_destroy_pool(struct zs_pool *pool);
> @@ -43,6 +44,9 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
> size_t mem_len, void *local_copy);
> void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> size_t mem_len, void *handle_mem);
> +int zs_obj_read_sg_begin(struct zs_pool *pool, unsigned long handle,
> + struct scatterlist *sg, size_t mem_len);
> +void zs_obj_read_sg_end(struct zs_pool *pool, unsigned long handle);
> void zs_obj_write(struct zs_pool *pool, unsigned long handle,
> void *handle_mem, size_t mem_len);
>
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 16d5587a052a..8f7569058147 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -30,6 +30,7 @@
> #include <linux/highmem.h>
> #include <linux/string.h>
> #include <linux/slab.h>
> +#include <linux/scatterlist.h>
> #include <linux/spinlock.h>
> #include <linux/sprintf.h>
> #include <linux/shrinker.h>
> @@ -1146,6 +1147,70 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
> }
> EXPORT_SYMBOL_GPL(zs_obj_read_end);
>
> +int zs_obj_read_sg_begin(struct zs_pool *pool, unsigned long handle,
> + struct scatterlist *sg, size_t mem_len)
> +{
> + struct zspage *zspage;
> + struct zpdesc *zpdesc;
> + unsigned long obj, off;
> + unsigned int obj_idx;
> + struct size_class *class;
> +
> + /* Guarantee we can get zspage from handle safely */
> + read_lock(&pool->lock);
> + obj = handle_to_obj(handle);
> + obj_to_location(obj, &zpdesc, &obj_idx);
> + zspage = get_zspage(zpdesc);
> +
> + /* Make sure migration doesn't move any pages in this zspage */
> + zspage_read_lock(zspage);
> + read_unlock(&pool->lock);
> +
> + class = zspage_class(pool, zspage);
> + off = offset_in_page(class->size * obj_idx);
> +
> + if (!ZsHugePage(zspage))
> + off += ZS_HANDLE_SIZE;
> +
> + if (off + mem_len <= PAGE_SIZE) {
> + /* this object is contained entirely within a page */
> + sg_init_table(sg, 1);
> + sg_set_page(sg, zpdesc_page(zpdesc), mem_len, off);
> + } else {
> + size_t sizes[2];
> +
> + /* this object spans two pages */
> + sizes[0] = PAGE_SIZE - off;
> + sizes[1] = mem_len - sizes[0];
> +
> + sg_init_table(sg, 2);
> + sg_set_page(sg, zpdesc_page(zpdesc), sizes[0], off);
> +
> + zpdesc = get_next_zpdesc(zpdesc);
> + sg = sg_next(sg);
Is this stateful? Will the SG list be returned pointing at the second
page now?
> +
> + sg_set_page(sg, zpdesc_page(zpdesc), sizes[1], 0);
> + }
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(zs_obj_read_sg_begin);
> +
> +void zs_obj_read_sg_end(struct zs_pool *pool, unsigned long handle)
> +{
> + struct zspage *zspage;
> + struct zpdesc *zpdesc;
> + unsigned long obj;
> + unsigned int obj_idx;
> +
> + obj = handle_to_obj(handle);
> + obj_to_location(obj, &zpdesc, &obj_idx);
> + zspage = get_zspage(zpdesc);
> +
> + zspage_read_unlock(zspage);
> +}
> +EXPORT_SYMBOL_GPL(zs_obj_read_sg_end);
> +
> void zs_obj_write(struct zs_pool *pool, unsigned long handle,
> void *handle_mem, size_t mem_len)
> {
> --
> 2.52.0.457.g6b5491de43-goog
>
Powered by blists - more mailing lists