[<prev] [next>] [day] [month] [year] [list]
Message-ID: <20260205053013.25134-1-jiayuan.chen@linux.dev>
Date: Thu, 5 Feb 2026 13:30:12 +0800
From: Jiayuan Chen <jiayuan.chen@...ux.dev>
To: linux-mm@...ck.org
Cc: jiayuan.chen@...ux.dev,
Jiayuan Chen <jiayuan.chen@...pee.com>,
Johannes Weiner <hannes@...xchg.org>,
Michal Hocko <mhocko@...nel.org>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeel.butt@...ux.dev>,
Muchun Song <muchun.song@...ux.dev>,
Yosry Ahmed <yosry.ahmed@...ux.dev>,
Nhat Pham <nphamcs@...il.com>,
Chengming Zhou <chengming.zhou@...ux.dev>,
Andrew Morton <akpm@...ux-foundation.org>,
Nick Terrell <terrelln@...com>,
David Sterba <dsterba@...e.com>,
cgroups@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [PATCH v1] mm: zswap: add per-memcg stat for incompressible pages
From: Jiayuan Chen <jiayuan.chen@...pee.com>
The global zswap_stored_incompressible_pages counter was added in commit
dca4437a5861 ("mm/zswap: store <PAGE_SIZE compression failed page as-is")
to track how many pages are stored in raw (uncompressed) form in zswap.
However, in containerized environments, knowing which cgroup is
contributing incompressible pages is essential for effective resource
management.
Add a new memcg stat 'zswpraw' to track incompressible pages per cgroup.
This helps administrators and orchestrators to:
1. Identify workloads that produce incompressible data (e.g., encrypted
data, already-compressed media, random data) and may not benefit from
zswap.
2. Make informed decisions about workload placement - moving
incompressible workloads to nodes with larger swap backing devices
rather than relying on zswap.
3. Debug zswap efficiency issues at the cgroup level without needing to
correlate global stats with individual cgroups.
While the compression ratio can be estimated from existing stats
(zswap / zswapped * PAGE_SIZE), this doesn't distinguish between
"uniformly poor compression" and "a few completely incompressible pages
mixed with highly compressible ones". The zswpraw stat provides direct
visibility into the latter case.
Changes
-------
1. Add zswap_is_raw() helper (include/linux/zswap.h)
- Abstract the PAGE_SIZE comparison logic for identifying raw entries
- Keep the incompressible check in one place for maintainability
2. Add MEMCG_ZSWAP_RAW stat definition (include/linux/memcontrol.h,
mm/memcontrol.c)
- Add MEMCG_ZSWAP_RAW to memcg_stat_item enum
- Register in memcg_stat_items[] and memory_stats[] arrays
- Export as "zswpraw" in memory.stat
3. Update statistics accounting (mm/memcontrol.c, mm/zswap.c)
- Track MEMCG_ZSWAP_RAW in obj_cgroup_charge/uncharge_zswap()
- Use zswap_is_raw() helper in zswap.c for consistency
Test
----
I wrote a simple test program[1] that allocates memory and compresses it
with zstd, so kernel zswap cannot compress further.
$ cgcreate -g memory:test
$ cgexec -g memory:test ./test_zswpraw &
$ cat /sys/fs/cgroup/test/memory.stat | grep zswp
zswpraw 0
zswpin 0
zswpout 0
zswpwb 0
$ echo "100M" > /sys/fs/cgroup/test/memory.reclaim
$ cat /sys/fs/cgroup/test/memory.stat | grep zswp
zswpraw 104800256
zswpin 0
zswpout 51222
zswpwb 0
$ pkill test_zswpraw
$ cat /sys/fs/cgroup/test/memory.stat | grep zswp
zswpraw 0
zswpin 1
zswpout 51222
zswpwb 0
[1] https://gist.github.com/mrpre/00432c6154250326994fbeaf62e0e6f1
Signed-off-by: Jiayuan Chen <jiayuan.chen@...pee.com>
---
include/linux/memcontrol.h | 1 +
include/linux/zswap.h | 9 +++++++++
mm/memcontrol.c | 6 ++++++
mm/zswap.c | 6 +++---
4 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b6c82c8f73e1..83d1328f81d1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -39,6 +39,7 @@ enum memcg_stat_item {
MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
+ MEMCG_ZSWAP_RAW,
MEMCG_NR_STAT,
};
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 30c193a1207e..94f84b154b71 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -7,6 +7,15 @@
struct lruvec;
+/*
+ * Check if a zswap entry is stored in raw (uncompressed) form.
+ * This happens when compression doesn't reduce the size.
+ */
+static inline bool zswap_is_raw(size_t size)
+{
+ return size == PAGE_SIZE;
+}
+
extern atomic_long_t zswap_stored_pages;
#ifdef CONFIG_ZSWAP
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 007413a53b45..32fb801530a3 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -341,6 +341,7 @@ static const unsigned int memcg_stat_items[] = {
MEMCG_KMEM,
MEMCG_ZSWAP_B,
MEMCG_ZSWAPPED,
+ MEMCG_ZSWAP_RAW,
};
#define NR_MEMCG_NODE_STAT_ITEMS ARRAY_SIZE(memcg_node_stat_items)
@@ -1346,6 +1347,7 @@ static const struct memory_stat memory_stats[] = {
#ifdef CONFIG_ZSWAP
{ "zswap", MEMCG_ZSWAP_B },
{ "zswapped", MEMCG_ZSWAPPED },
+ { "zswpraw", MEMCG_ZSWAP_RAW },
#endif
{ "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY },
@@ -5458,6 +5460,8 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
memcg = obj_cgroup_memcg(objcg);
mod_memcg_state(memcg, MEMCG_ZSWAP_B, size);
mod_memcg_state(memcg, MEMCG_ZSWAPPED, 1);
+ if (zswap_is_raw(size))
+ mod_memcg_state(memcg, MEMCG_ZSWAP_RAW, 1);
rcu_read_unlock();
}
@@ -5481,6 +5485,8 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
memcg = obj_cgroup_memcg(objcg);
mod_memcg_state(memcg, MEMCG_ZSWAP_B, -size);
mod_memcg_state(memcg, MEMCG_ZSWAPPED, -1);
+ if (zswap_is_raw(size))
+ mod_memcg_state(memcg, MEMCG_ZSWAP_RAW, -1);
rcu_read_unlock();
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 3d2d59ac3f9c..54ab4d126f64 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -723,7 +723,7 @@ static void zswap_entry_free(struct zswap_entry *entry)
obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
obj_cgroup_put(entry->objcg);
}
- if (entry->length == PAGE_SIZE)
+ if (zswap_is_raw(entry->length))
atomic_long_dec(&zswap_stored_incompressible_pages);
zswap_entry_cache_free(entry);
atomic_long_dec(&zswap_stored_pages);
@@ -941,7 +941,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
zs_obj_read_sg_begin(pool->zs_pool, entry->handle, input, entry->length);
/* zswap entries of length PAGE_SIZE are not compressed. */
- if (entry->length == PAGE_SIZE) {
+ if (zswap_is_raw(entry->length)) {
WARN_ON_ONCE(input->length != PAGE_SIZE);
memcpy_from_sglist(kmap_local_folio(folio, 0), input, 0, PAGE_SIZE);
dlen = PAGE_SIZE;
@@ -1448,7 +1448,7 @@ static bool zswap_store_page(struct page *page,
obj_cgroup_charge_zswap(objcg, entry->length);
}
atomic_long_inc(&zswap_stored_pages);
- if (entry->length == PAGE_SIZE)
+ if (zswap_is_raw(entry->length))
atomic_long_inc(&zswap_stored_incompressible_pages);
/*
--
2.43.0
Powered by blists - more mailing lists