lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <hevovghpl2udhpof66oz26ulrpqcrtuwjxcakyskoeoil2wo6x@osbrncj7ifwz>
Date: Fri, 6 Feb 2026 00:39:53 +0000
From: Yosry Ahmed <yosry.ahmed@...ux.dev>
To: Jiayuan Chen <jiayuan.chen@...ux.dev>
Cc: linux-mm@...ck.org, Jiayuan Chen <jiayuan.chen@...pee.com>, 
	Johannes Weiner <hannes@...xchg.org>, Michal Hocko <mhocko@...nel.org>, 
	Roman Gushchin <roman.gushchin@...ux.dev>, Shakeel Butt <shakeel.butt@...ux.dev>, 
	Muchun Song <muchun.song@...ux.dev>, Nhat Pham <nphamcs@...il.com>, 
	Chengming Zhou <chengming.zhou@...ux.dev>, Andrew Morton <akpm@...ux-foundation.org>, 
	Nick Terrell <terrelln@...com>, David Sterba <dsterba@...e.com>, cgroups@...r.kernel.org, 
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH v1] mm: zswap: add per-memcg stat for incompressible pages

On Thu, Feb 05, 2026 at 01:30:12PM +0800, Jiayuan Chen wrote:
> From: Jiayuan Chen <jiayuan.chen@...pee.com>
> 
> The global zswap_stored_incompressible_pages counter was added in commit
> dca4437a5861 ("mm/zswap: store <PAGE_SIZE compression failed page as-is")
> to track how many pages are stored in raw (uncompressed) form in zswap.
> However, in containerized environments, knowing which cgroup is
> contributing incompressible pages is essential for effective resource
> management.
> 
> Add a new memcg stat 'zswpraw' to track incompressible pages per cgroup.
> This helps administrators and orchestrators to:
> 
> 1. Identify workloads that produce incompressible data (e.g., encrypted
>    data, already-compressed media, random data) and may not benefit from
>    zswap.
> 
> 2. Make informed decisions about workload placement - moving
>    incompressible workloads to nodes with larger swap backing devices
>    rather than relying on zswap.
> 
> 3. Debug zswap efficiency issues at the cgroup level without needing to
>    correlate global stats with individual cgroups.
> 
> While the compression ratio can be estimated from existing stats
> (zswap / zswapped * PAGE_SIZE), this doesn't distinguish between
> "uniformly poor compression" and "a few completely incompressible pages
> mixed with highly compressible ones". The zswpraw stat provides direct
> visibility into the latter case.
> 
> Changes
> -------
> 
> 1. Add zswap_is_raw() helper (include/linux/zswap.h)
>    - Abstract the PAGE_SIZE comparison logic for identifying raw entries
>    - Keep the incompressible check in one place for maintainability
> 
> 2. Add MEMCG_ZSWAP_RAW stat definition (include/linux/memcontrol.h,
>    mm/memcontrol.c)
>    - Add MEMCG_ZSWAP_RAW to memcg_stat_item enum
>    - Register in memcg_stat_items[] and memory_stats[] arrays
>    - Export as "zswpraw" in memory.stat
> 
> 3. Update statistics accounting (mm/memcontrol.c, mm/zswap.c)
>    - Track MEMCG_ZSWAP_RAW in obj_cgroup_charge/uncharge_zswap()
>    - Use zswap_is_raw() helper in zswap.c for consistency
> 
> Test
> ----
> 
> I wrote a simple test program[1] that allocates memory and compresses it
> with zstd, so kernel zswap cannot compress further.
> 
>   $ cgcreate -g memory:test
>   $ cgexec -g memory:test ./test_zswpraw &
>   $ cat /sys/fs/cgroup/test/memory.stat | grep zswp
>   zswpraw 0
>   zswpin 0
>   zswpout 0
>   zswpwb 0
> 
>   $ echo "100M" > /sys/fs/cgroup/test/memory.reclaim
>   $ cat /sys/fs/cgroup/test/memory.stat | grep zswp
>   zswpraw 104800256
>   zswpin 0
>   zswpout 51222
>   zswpwb 0
> 
>   $ pkill test_zswpraw
>   $ cat /sys/fs/cgroup/test/memory.stat | grep zswp
>   zswpraw 0
>   zswpin 1
>   zswpout 51222
>   zswpwb 0
> 
> [1] https://gist.github.com/mrpre/00432c6154250326994fbeaf62e0e6f1
> 
> Signed-off-by: Jiayuan Chen <jiayuan.chen@...pee.com>
> ---
>  include/linux/memcontrol.h | 1 +
>  include/linux/zswap.h      | 9 +++++++++
>  mm/memcontrol.c            | 6 ++++++
>  mm/zswap.c                 | 6 +++---
>  4 files changed, 19 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index b6c82c8f73e1..83d1328f81d1 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -39,6 +39,7 @@ enum memcg_stat_item {
>  	MEMCG_KMEM,
>  	MEMCG_ZSWAP_B,
>  	MEMCG_ZSWAPPED,
> +	MEMCG_ZSWAP_RAW,

Please change the name as Shakeel suggested.

>  	MEMCG_NR_STAT,
>  };
>  
> diff --git a/include/linux/zswap.h b/include/linux/zswap.h
> index 30c193a1207e..94f84b154b71 100644
> --- a/include/linux/zswap.h
> +++ b/include/linux/zswap.h
> @@ -7,6 +7,15 @@
>  
>  struct lruvec;
>  
> +/*
> + * Check if a zswap entry is stored in raw (uncompressed) form.
> + * This happens when compression doesn't reduce the size.
> + */
> +static inline bool zswap_is_raw(size_t size)

Internall as well, please rename this to zswap_is_incompressible() or
zswap_is_incomp(). Not a big fan of the helper because it doesn't add
much, but I don't feel strongly either way.

> +{
> +	return size == PAGE_SIZE;
> +}
> +
>  extern atomic_long_t zswap_stored_pages;
>  
>  #ifdef CONFIG_ZSWAP
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 007413a53b45..32fb801530a3 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -341,6 +341,7 @@ static const unsigned int memcg_stat_items[] = {
>  	MEMCG_KMEM,
>  	MEMCG_ZSWAP_B,
>  	MEMCG_ZSWAPPED,
> +	MEMCG_ZSWAP_RAW,
>  };
>  
>  #define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
> @@ -1346,6 +1347,7 @@ static const struct memory_stat memory_stats[] = {
>  #ifdef CONFIG_ZSWAP
>  	{ "zswap",			MEMCG_ZSWAP_B			},
>  	{ "zswapped",			MEMCG_ZSWAPPED			},
> +	{ "zswpraw",			MEMCG_ZSWAP_RAW			},

Here as well: zswap_incompressible or zswap_incomp?

Other than the renames and doc, LGTM.

>  #endif
>  	{ "file_mapped",		NR_FILE_MAPPED			},
>  	{ "file_dirty",			NR_FILE_DIRTY			},
> @@ -5458,6 +5460,8 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
>  	memcg = obj_cgroup_memcg(objcg);
>  	mod_memcg_state(memcg, MEMCG_ZSWAP_B, size);
>  	mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1);
> +	if (zswap_is_raw(size))
> +		mod_memcg_state(memcg, MEMCG_ZSWAP_RAW, 1);
>  	rcu_read_unlock();
>  }
>  
> @@ -5481,6 +5485,8 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
>  	memcg = obj_cgroup_memcg(objcg);
>  	mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size);
>  	mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1);
> +	if (zswap_is_raw(size))
> +		mod_memcg_state(memcg, MEMCG_ZSWAP_RAW, -1);
>  	rcu_read_unlock();
>  }
>  
> diff --git a/mm/zswap.c b/mm/zswap.c
> index 3d2d59ac3f9c..54ab4d126f64 100644
> --- a/mm/zswap.c
> +++ b/mm/zswap.c
> @@ -723,7 +723,7 @@ static void zswap_entry_free(struct zswap_entry *entry)
>  		obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
>  		obj_cgroup_put(entry->objcg);
>  	}
> -	if (entry->length == PAGE_SIZE)
> +	if (zswap_is_raw(entry->length))
>  		atomic_long_dec(&zswap_stored_incompressible_pages);
>  	zswap_entry_cache_free(entry);
>  	atomic_long_dec(&zswap_stored_pages);
> @@ -941,7 +941,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
>  	zs_obj_read_sg_begin(pool->zs_pool, entry->handle, input, entry->length);
>  
>  	/* zswap entries of length PAGE_SIZE are not compressed. */
> -	if (entry->length == PAGE_SIZE) {
> +	if (zswap_is_raw(entry->length)) {
>  		WARN_ON_ONCE(input->length != PAGE_SIZE);
>  		memcpy_from_sglist(kmap_local_folio(folio, 0), input, 0, PAGE_SIZE);
>  		dlen = PAGE_SIZE;
> @@ -1448,7 +1448,7 @@ static bool zswap_store_page(struct page *page,
>  		obj_cgroup_charge_zswap(objcg, entry->length);
>  	}
>  	atomic_long_inc(&zswap_stored_pages);
> -	if (entry->length == PAGE_SIZE)
> +	if (zswap_is_raw(entry->length))
>  		atomic_long_inc(&zswap_stored_incompressible_pages);
>  
>  	/*
> -- 
> 2.43.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ