[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260108203755.1163107-8-gourry@gourry.net>
Date: Thu, 8 Jan 2026 15:37:54 -0500
From: Gregory Price <gourry@...rry.net>
To: linux-mm@...ck.org,
cgroups@...r.kernel.org,
linux-cxl@...r.kernel.org
Cc: linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
kernel-team@...a.com,
longman@...hat.com,
tj@...nel.org,
hannes@...xchg.org,
mkoutny@...e.com,
corbet@....net,
gregkh@...uxfoundation.org,
rafael@...nel.org,
dakr@...nel.org,
dave@...olabs.net,
jonathan.cameron@...wei.com,
dave.jiang@...el.com,
alison.schofield@...el.com,
vishal.l.verma@...el.com,
ira.weiny@...el.com,
dan.j.williams@...el.com,
akpm@...ux-foundation.org,
vbabka@...e.cz,
surenb@...gle.com,
mhocko@...e.com,
jackmanb@...gle.com,
ziy@...dia.com,
david@...nel.org,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
rppt@...nel.org,
axelrasmussen@...gle.com,
yuanchu@...gle.com,
weixugc@...gle.com,
yury.norov@...il.com,
linux@...musvillemoes.dk,
rientjes@...gle.com,
shakeel.butt@...ux.dev,
chrisl@...nel.org,
kasong@...cent.com,
shikemeng@...weicloud.com,
nphamcs@...il.com,
bhe@...hat.com,
baohua@...nel.org,
yosry.ahmed@...ux.dev,
chengming.zhou@...ux.dev,
roman.gushchin@...ux.dev,
muchun.song@...ux.dev,
osalvador@...e.de,
matthew.brost@...el.com,
joshua.hahnjy@...il.com,
rakie.kim@...com,
byungchul@...com,
gourry@...rry.net,
ying.huang@...ux.alibaba.com,
apopple@...dia.com,
cl@...two.org,
harry.yoo@...cle.com,
zhengqi.arch@...edance.com
Subject: [RFC PATCH v3 7/8] mm/zswap: compressed ram direct integration
If a private zswap-node is available, skip the entire software
compression process and memcpy directly to a compressed memory
folio, and store the newly allocated compressed memory page as
the zswap entry->handle.
On decompress we do the opposite: copy directly from the stored
page to the destination, and free the compressed memory page.
The driver callback is responsible for preventing run-away
compression ratio failures by checking that the allocated page is
safe to use (i.e. a compression ratio limit hasn't been crossed).
Signed-off-by: Gregory Price <gourry@...rry.net>
---
include/linux/zswap.h | 5 ++
mm/zswap.c | 106 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 109 insertions(+), 2 deletions(-)
diff --git a/include/linux/zswap.h b/include/linux/zswap.h
index 30c193a1207e..4b52fe447e7e 100644
--- a/include/linux/zswap.h
+++ b/include/linux/zswap.h
@@ -35,6 +35,8 @@ void zswap_lruvec_state_init(struct lruvec *lruvec);
void zswap_folio_swapin(struct folio *folio);
bool zswap_is_enabled(void);
bool zswap_never_enabled(void);
+void zswap_add_direct_node(int nid);
+void zswap_remove_direct_node(int nid);
#else
struct zswap_lruvec_state {};
@@ -69,6 +71,9 @@ static inline bool zswap_never_enabled(void)
return true;
}
+static inline void zswap_add_direct_node(int nid) {}
+static inline void zswap_remove_direct_node(int nid) {}
+
#endif
#endif /* _LINUX_ZSWAP_H */
diff --git a/mm/zswap.c b/mm/zswap.c
index de8858ff1521..aada588c957e 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -35,6 +35,7 @@
#include <linux/workqueue.h>
#include <linux/list_lru.h>
#include <linux/zsmalloc.h>
+#include <linux/node.h>
#include "swap.h"
#include "internal.h"
@@ -190,6 +191,7 @@ struct zswap_entry {
swp_entry_t swpentry;
unsigned int length;
bool referenced;
+ bool direct;
struct zswap_pool *pool;
unsigned long handle;
struct obj_cgroup *objcg;
@@ -199,6 +201,20 @@ struct zswap_entry {
static struct xarray *zswap_trees[MAX_SWAPFILES];
static unsigned int nr_zswap_trees[MAX_SWAPFILES];
+/* Nodemask for compressed RAM nodes used by zswap_compress_direct */
+static nodemask_t zswap_direct_nodes = NODE_MASK_NONE;
+
+void zswap_add_direct_node(int nid)
+{
+ node_set(nid, zswap_direct_nodes);
+}
+
+void zswap_remove_direct_node(int nid)
+{
+ if (!node_online(nid))
+ node_clear(nid, zswap_direct_nodes);
+}
+
/* RCU-protected iteration */
static LIST_HEAD(zswap_pools);
/* protects zswap_pools list modification */
@@ -716,7 +732,13 @@ static void zswap_entry_cache_free(struct zswap_entry *entry)
static void zswap_entry_free(struct zswap_entry *entry)
{
zswap_lru_del(&zswap_list_lru, entry);
- zs_free(entry->pool->zs_pool, entry->handle);
+ if (entry->direct) {
+ struct page *page = (struct page *)entry->handle;
+
+ node_private_freed(page);
+ __free_page(page);
+ } else
+ zs_free(entry->pool->zs_pool, entry->handle);
zswap_pool_put(entry->pool);
if (entry->objcg) {
obj_cgroup_uncharge_zswap(entry->objcg, entry->length);
@@ -849,6 +871,58 @@ static void acomp_ctx_put_unlock(struct crypto_acomp_ctx *acomp_ctx)
mutex_unlock(&acomp_ctx->mutex);
}
+static struct page *zswap_compress_direct(struct page *src,
+ struct zswap_entry *entry)
+{
+ int nid;
+ struct page *dst;
+ gfp_t gfp;
+ nodemask_t tried_nodes = NODE_MASK_NONE;
+
+ if (nodes_empty(zswap_direct_nodes))
+ return NULL;
+
+ gfp = GFP_NOWAIT | __GFP_NORETRY | __GFP_HIGHMEM | __GFP_MOVABLE |
+ __GFP_THISNODE;
+
+ for_each_node_mask(nid, zswap_direct_nodes) {
+ int ret;
+
+ /* Skip nodes we've already tried and failed */
+ if (node_isset(nid, tried_nodes))
+ continue;
+
+ dst = __alloc_pages(gfp, 0, nid, &zswap_direct_nodes);
+ if (!dst)
+ continue;
+
+ /*
+ * Check with the device driver that this page is safe to use.
+ * If the device reports an error (e.g., compression ratio is
+ * too low and the page can't safely store data), free the page
+ * and try another node.
+ */
+ ret = node_private_allocated(dst);
+ if (ret) {
+ __free_page(dst);
+ node_set(nid, tried_nodes);
+ continue;
+ }
+
+ goto found;
+ }
+
+ return NULL;
+
+found:
+ /* If we fail to copy at this point just fallback */
+ if (copy_mc_highpage(dst, src)) {
+ __free_page(dst);
+ dst = NULL;
+ }
+ return dst;
+}
+
static bool zswap_compress(struct page *page, struct zswap_entry *entry,
struct zswap_pool *pool)
{
@@ -860,6 +934,17 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
gfp_t gfp;
u8 *dst;
bool mapped = false;
+ struct page *zpage;
+
+ /* Try to shunt directly to compressed ram */
+ zpage = zswap_compress_direct(page, entry);
+ if (zpage) {
+ entry->handle = (unsigned long)zpage;
+ entry->length = PAGE_SIZE;
+ entry->direct = true;
+ return true;
+ }
+ /* otherwise fallback to normal zswap */
acomp_ctx = acomp_ctx_get_cpu_lock(pool);
dst = acomp_ctx->buffer;
@@ -913,6 +998,7 @@ static bool zswap_compress(struct page *page, struct zswap_entry *entry,
zs_obj_write(pool->zs_pool, handle, dst, dlen);
entry->handle = handle;
entry->length = dlen;
+ entry->direct = false;
unlock:
if (mapped)
@@ -936,6 +1022,15 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
int decomp_ret = 0, dlen = PAGE_SIZE;
u8 *src, *obj;
+ /* compressed ram page */
+ if (entry->direct) {
+ struct page *src = (struct page *)entry->handle;
+ struct folio *zfolio = page_folio(src);
+
+ memcpy_folio(folio, 0, zfolio, 0, PAGE_SIZE);
+ goto direct_done;
+ }
+
acomp_ctx = acomp_ctx_get_cpu_lock(pool);
obj = zs_obj_read_begin(pool->zs_pool, entry->handle, acomp_ctx->buffer);
@@ -969,6 +1064,7 @@ static bool zswap_decompress(struct zswap_entry *entry, struct folio *folio)
zs_obj_read_end(pool->zs_pool, entry->handle, obj);
acomp_ctx_put_unlock(acomp_ctx);
+direct_done:
if (!decomp_ret && dlen == PAGE_SIZE)
return true;
@@ -1483,7 +1579,13 @@ static bool zswap_store_page(struct page *page,
return true;
store_failed:
- zs_free(pool->zs_pool, entry->handle);
+ if (entry->direct) {
+ struct page *freepage = (struct page *)entry->handle;
+
+ node_private_freed(freepage);
+ __free_page(freepage);
+ } else
+ zs_free(pool->zs_pool, entry->handle);
compress_failed:
zswap_entry_cache_free(entry);
return false;
--
2.52.0
Powered by blists - more mailing lists