lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <mldy4ayvdlmdz2c6spsmbuwiekvqtnxoj2lzg2ktehmdefsees@wdi7vw7kliuq>
Date: Fri, 9 Jan 2026 16:02:51 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Sergey Senozhatsky <senozhatsky@...omium.org>
Cc: Herbert Xu <herbert@...dor.apana.org.au>, 
	Andrew Morton <akpm@...ux-foundation.org>, Nhat Pham <nphamcs@...il.com>, Minchan Kim <minchan@...nel.org>, 
	Johannes Weiner <hannes@...xchg.org>, Brian Geffon <bgeffon@...gle.com>, linux-kernel@...r.kernel.org, 
	linux-mm@...ck.org
Subject: Re: [RFC PATCH 2/2] zsmalloc: chain-length configuration should
 consider other metrics

On Fri, Jan 09, 2026 at 12:29:58PM +0900, Sergey Senozhatsky wrote:
> On (26/01/08 08:01), Yosry Ahmed wrote:
> > > Yeah I agree, I guess I can cook something up.
> > > 
> > > For transition period we can have:
> > > - current "memcpy" API
> > >   for zswap
> > > 
> > > - SG-list API
> > > 
> > > I can vmap either on the zram side or have new zsmalloc vmap API
> > > (alongside the memcpy and SG-list APIs).
> > > 
> > > Once crypto API supports SG-list and algorithms tunables I can
> > > switch zram over from zcomp to crypto API and remove memcpy and
> > > vmap APIs from zsmalloc.
> > 
> > IIUC based on Herbert's previous response, crypto and scomp already
> > support passing in a discontiguous SG-list. So for zswap, if zsmalloc
> > returns an SG-list, it will just be passed as-is to the crypto API.
> 
> Oh, okay,
> 
> Something like below?  Not really familiar with SG-list API.

That makes two of us :P

Herbert, do you mind taking a look at this? It looks sane to me except
for one question below.

I can try to test this next week with zswap and see if it blows up.

> 
> ---
>  include/linux/zsmalloc.h |  4 +++
>  mm/zsmalloc.c            | 65 ++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 69 insertions(+)
> 
> diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
> index 5565c3171007..11e614663dd3 100644
> --- a/include/linux/zsmalloc.h
> +++ b/include/linux/zsmalloc.h
> @@ -22,6 +22,7 @@ struct zs_pool_stats {
>  };
>  
>  struct zs_pool;
> +struct scatterlist;
>  
>  struct zs_pool *zs_create_pool(const char *name);
>  void zs_destroy_pool(struct zs_pool *pool);
> @@ -43,6 +44,9 @@ void *zs_obj_read_begin(struct zs_pool *pool, unsigned long handle,
>  			size_t mem_len, void *local_copy);
>  void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
>  		     size_t mem_len, void *handle_mem);
> +int zs_obj_read_sg_begin(struct zs_pool *pool, unsigned long handle,
> +		   struct scatterlist *sg, size_t mem_len);
> +void zs_obj_read_sg_end(struct zs_pool *pool, unsigned long handle);
>  void zs_obj_write(struct zs_pool *pool, unsigned long handle,
>  		  void *handle_mem, size_t mem_len);
>  
> diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
> index 16d5587a052a..8f7569058147 100644
> --- a/mm/zsmalloc.c
> +++ b/mm/zsmalloc.c
> @@ -30,6 +30,7 @@
>  #include <linux/highmem.h>
>  #include <linux/string.h>
>  #include <linux/slab.h>
> +#include <linux/scatterlist.h>
>  #include <linux/spinlock.h>
>  #include <linux/sprintf.h>
>  #include <linux/shrinker.h>
> @@ -1146,6 +1147,70 @@ void zs_obj_read_end(struct zs_pool *pool, unsigned long handle,
>  }
>  EXPORT_SYMBOL_GPL(zs_obj_read_end);
>  
> +int zs_obj_read_sg_begin(struct zs_pool *pool, unsigned long handle,
> +		   struct scatterlist *sg, size_t mem_len)
> +{
> +	struct zspage *zspage;
> +	struct zpdesc *zpdesc;
> +	unsigned long obj, off;
> +	unsigned int obj_idx;
> +	struct size_class *class;
> +
> +	/* Guarantee we can get zspage from handle safely */
> +	read_lock(&pool->lock);
> +	obj = handle_to_obj(handle);
> +	obj_to_location(obj, &zpdesc, &obj_idx);
> +	zspage = get_zspage(zpdesc);
> +
> +	/* Make sure migration doesn't move any pages in this zspage */
> +	zspage_read_lock(zspage);
> +	read_unlock(&pool->lock);
> +
> +	class = zspage_class(pool, zspage);
> +	off = offset_in_page(class->size * obj_idx);
> +
> +	if (!ZsHugePage(zspage))
> +		off += ZS_HANDLE_SIZE;
> +
> +	if (off + mem_len <= PAGE_SIZE) {
> +		/* this object is contained entirely within a page */
> +		sg_init_table(sg, 1);
> +		sg_set_page(sg, zpdesc_page(zpdesc), mem_len, off);
> +	} else {
> +		size_t sizes[2];
> +
> +		/* this object spans two pages */
> +		sizes[0] = PAGE_SIZE - off;
> +		sizes[1] = mem_len - sizes[0];
> +
> +		sg_init_table(sg, 2);
> +		sg_set_page(sg, zpdesc_page(zpdesc), sizes[0], off);
> +
> +		zpdesc = get_next_zpdesc(zpdesc);
> +		sg = sg_next(sg);

Is this stateful? Will the SG list be returned pointing at the second
page now?

> +
> +		sg_set_page(sg, zpdesc_page(zpdesc), sizes[1], 0);
> +	}
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(zs_obj_read_sg_begin);
> +
> +void zs_obj_read_sg_end(struct zs_pool *pool, unsigned long handle)
> +{
> +	struct zspage *zspage;
> +	struct zpdesc *zpdesc;
> +	unsigned long obj;
> +	unsigned int obj_idx;
> +
> +	obj = handle_to_obj(handle);
> +	obj_to_location(obj, &zpdesc, &obj_idx);
> +	zspage = get_zspage(zpdesc);
> +
> +	zspage_read_unlock(zspage);
> +}
> +EXPORT_SYMBOL_GPL(zs_obj_read_sg_end);
> +
>  void zs_obj_write(struct zs_pool *pool, unsigned long handle,
>  		  void *handle_mem, size_t mem_len)
>  {
> -- 
> 2.52.0.457.g6b5491de43-goog
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ