[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260108203755.1163107-9-gourry@gourry.net>
Date: Thu, 8 Jan 2026 15:37:55 -0500
From: Gregory Price <gourry@...rry.net>
To: linux-mm@...ck.org,
cgroups@...r.kernel.org,
linux-cxl@...r.kernel.org
Cc: linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
kernel-team@...a.com,
longman@...hat.com,
tj@...nel.org,
hannes@...xchg.org,
mkoutny@...e.com,
corbet@....net,
gregkh@...uxfoundation.org,
rafael@...nel.org,
dakr@...nel.org,
dave@...olabs.net,
jonathan.cameron@...wei.com,
dave.jiang@...el.com,
alison.schofield@...el.com,
vishal.l.verma@...el.com,
ira.weiny@...el.com,
dan.j.williams@...el.com,
akpm@...ux-foundation.org,
vbabka@...e.cz,
surenb@...gle.com,
mhocko@...e.com,
jackmanb@...gle.com,
ziy@...dia.com,
david@...nel.org,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
rppt@...nel.org,
axelrasmussen@...gle.com,
yuanchu@...gle.com,
weixugc@...gle.com,
yury.norov@...il.com,
linux@...musvillemoes.dk,
rientjes@...gle.com,
shakeel.butt@...ux.dev,
chrisl@...nel.org,
kasong@...cent.com,
shikemeng@...weicloud.com,
nphamcs@...il.com,
bhe@...hat.com,
baohua@...nel.org,
yosry.ahmed@...ux.dev,
chengming.zhou@...ux.dev,
roman.gushchin@...ux.dev,
muchun.song@...ux.dev,
osalvador@...e.de,
matthew.brost@...el.com,
joshua.hahnjy@...il.com,
rakie.kim@...com,
byungchul@...com,
gourry@...rry.net,
ying.huang@...ux.alibaba.com,
apopple@...dia.com,
cl@...two.org,
harry.yoo@...cle.com,
zhengqi.arch@...edance.com
Subject: [RFC PATCH v3 8/8] drivers/cxl: add zswap private_region type
Add a sample type of a zswap region, which registers itself as a valid
target node with mm/zswap. Zswap will callback into the driver on new
page allocation and free.
On cxl_zswap_page_allocated(), we would check whether the worst case vs
current compression ratio is safe to allow new writes.
On cxl_zswap_page_freed(), zero the page to adjust the ratio down.
A device driver registering a Zswap private region would need to provide
an indicator to this component whether to allow new allocations - this
would probably be done via an interrupt setting a bit which says the
compression ratio has reached some conservative threshold.
Signed-off-by: Gregory Price <gourry@...rry.net>
---
drivers/cxl/core/private_region/Makefile | 3 +
.../cxl/core/private_region/private_region.c | 10 ++
.../cxl/core/private_region/private_region.h | 4 +
drivers/cxl/core/private_region/zswap.c | 127 ++++++++++++++++++
drivers/cxl/cxl.h | 2 +
5 files changed, 146 insertions(+)
create mode 100644 drivers/cxl/core/private_region/zswap.c
diff --git a/drivers/cxl/core/private_region/Makefile b/drivers/cxl/core/private_region/Makefile
index d17498129ba6..ba495cd3f89f 100644
--- a/drivers/cxl/core/private_region/Makefile
+++ b/drivers/cxl/core/private_region/Makefile
@@ -7,3 +7,6 @@ ccflags-y += -I$(srctree)/drivers/cxl
# Core dispatch and sysfs
obj-$(CONFIG_CXL_REGION) += private_region.o
+
+# Type-specific implementations
+obj-$(CONFIG_CXL_REGION) += zswap.o
diff --git a/drivers/cxl/core/private_region/private_region.c b/drivers/cxl/core/private_region/private_region.c
index ead48abb9fc7..da5fb3d264e1 100644
--- a/drivers/cxl/core/private_region/private_region.c
+++ b/drivers/cxl/core/private_region/private_region.c
@@ -16,6 +16,8 @@
static const char *private_type_to_string(enum cxl_private_region_type type)
{
switch (type) {
+ case CXL_PRIVATE_ZSWAP:
+ return "zswap";
default:
return "";
}
@@ -23,6 +25,8 @@ static const char *private_type_to_string(enum cxl_private_region_type type)
static enum cxl_private_region_type string_to_private_type(const char *str)
{
+ if (sysfs_streq(str, "zswap"))
+ return CXL_PRIVATE_ZSWAP;
return CXL_PRIVATE_NONE;
}
@@ -88,6 +92,9 @@ int cxl_register_private_region(struct cxl_region *cxlr)
/* Call type-specific registration which sets memtype and callbacks */
switch (cxlr->private_type) {
+ case CXL_PRIVATE_ZSWAP:
+ rc = cxl_register_zswap_region(cxlr);
+ break;
default:
dev_dbg(&cxlr->dev, "unsupported private_type: %d\n",
cxlr->private_type);
@@ -113,6 +120,9 @@ void cxl_unregister_private_region(struct cxl_region *cxlr)
/* Dispatch to type-specific cleanup */
switch (cxlr->private_type) {
+ case CXL_PRIVATE_ZSWAP:
+ cxl_unregister_zswap_region(cxlr);
+ break;
default:
break;
}
diff --git a/drivers/cxl/core/private_region/private_region.h b/drivers/cxl/core/private_region/private_region.h
index 9b34e51d8df4..84d43238dbe1 100644
--- a/drivers/cxl/core/private_region/private_region.h
+++ b/drivers/cxl/core/private_region/private_region.h
@@ -7,4 +7,8 @@ struct cxl_region;
int cxl_register_private_region(struct cxl_region *cxlr);
void cxl_unregister_private_region(struct cxl_region *cxlr);
+/* Type-specific registration functions - called from region.c dispatch */
+int cxl_register_zswap_region(struct cxl_region *cxlr);
+void cxl_unregister_zswap_region(struct cxl_region *cxlr);
+
#endif /* __CXL_PRIVATE_REGION_H__ */
diff --git a/drivers/cxl/core/private_region/zswap.c b/drivers/cxl/core/private_region/zswap.c
new file mode 100644
index 000000000000..c213abe2fad7
--- /dev/null
+++ b/drivers/cxl/core/private_region/zswap.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CXL Private Region - zswap type implementation
+ *
+ * This file implements the zswap private region type for CXL devices.
+ * It handles registration/unregistration of CXL regions as zswap
+ * compressed memory targets.
+ */
+
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/node.h>
+#include <linux/zswap.h>
+#include <linux/memory_hotplug.h>
+#include "../../cxl.h"
+#include "../core.h"
+#include "private_region.h"
+
+/*
+ * CXL zswap region page_allocated callback
+ *
+ * This callback is invoked by zswap when a page is allocated from a private
+ * node to validate that the page is safe to use. For a real compressed memory
+ * device, this would check the device's compression ratio and return an error
+ * if the page cannot safely store data.
+ *
+ * Currently this is a placeholder that always succeeds. A real implementation
+ * would query the device hardware to determine if sufficient compression
+ * headroom exists.
+ */
+static int cxl_zswap_page_allocated(struct page *page, void *data)
+{
+ struct cxl_region *cxlr = data;
+
+ /*
+ * TODO: Query the CXL device to check if this page allocation is safe.
+ *
+ * A real compressed memory device would track its compression ratio
+ * and report whether it has headroom to accept new data. If the
+ * compression ratio is too low (device is near capacity), this should
+ * return -ENOSPC to tell zswap to try another node.
+ *
+ * For now, always succeed since we're testing with regular memory.
+ */
+ dev_dbg(&cxlr->dev, "page_allocated callback for nid %d\n",
+ page_to_nid(page));
+
+ return 0;
+}
+
+/*
+ * CXL zswap region page_freed callback
+ *
+ * This callback is invoked when a page from a private node is being freed.
+ * We zero the page before returning it to the allocator so that the compressed
+ * memory device can reclaim capacity - zeroed pages achieve excellent
+ * compression ratios.
+ */
+static void cxl_zswap_page_freed(struct page *page, void *data)
+{
+ struct cxl_region *cxlr = data;
+
+ /*
+ * Zero the page to improve the device's compression ratio.
+ * Zeroed pages compress extremely well, reclaiming device capacity.
+ */
+ clear_highpage(page);
+
+ dev_dbg(&cxlr->dev, "page_freed callback for nid %d\n",
+ page_to_nid(page));
+}
+
+/*
+ * Unregister a zswap region from the zswap subsystem.
+ *
+ * This function removes the node from zswap direct nodes and unregisters
+ * the private node operations.
+ */
+void cxl_unregister_zswap_region(struct cxl_region *cxlr)
+{
+ int nid;
+
+ if (!cxlr->private ||
+ cxlr->private_ops.memtype != NODE_MEM_ZSWAP)
+ return;
+
+ if (!cxlr->params.res)
+ return;
+
+ nid = phys_to_target_node(cxlr->params.res->start);
+
+ zswap_remove_direct_node(nid);
+ node_unregister_private(nid, &cxlr->private_ops);
+
+ dev_dbg(&cxlr->dev, "unregistered zswap region for nid %d\n", nid);
+}
+
+/*
+ * Register a zswap region with the zswap subsystem.
+ *
+ * This function sets up the memtype, page_allocated callback, and
+ * registers the node with zswap as a direct compression target.
+ * The caller is responsible for adding the dax region after this succeeds.
+ */
+int cxl_register_zswap_region(struct cxl_region *cxlr)
+{
+ int nid, rc;
+
+ if (!cxlr->private || !cxlr->params.res)
+ return -EINVAL;
+
+ nid = phys_to_target_node(cxlr->params.res->start);
+
+ /* Register with node subsystem as zswap memory */
+ cxlr->private_ops.memtype = NODE_MEM_ZSWAP;
+ cxlr->private_ops.page_allocated = cxl_zswap_page_allocated;
+ cxlr->private_ops.page_freed = cxl_zswap_page_freed;
+ rc = node_register_private(nid, &cxlr->private_ops);
+ if (rc)
+ return rc;
+
+ /* Register this node with zswap as a direct compression target */
+ zswap_add_direct_node(nid);
+
+ dev_dbg(&cxlr->dev, "registered zswap region for nid %d\n", nid);
+ return 0;
+}
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index b276956ff88d..89d8ae4e796c 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -534,9 +534,11 @@ enum cxl_partition_mode {
/**
* enum cxl_private_region_type - CXL private region types
* @CXL_PRIVATE_NONE: No private region type set
+ * @CXL_PRIVATE_ZSWAP: Region used for zswap compressed memory
*/
enum cxl_private_region_type {
CXL_PRIVATE_NONE,
+ CXL_PRIVATE_ZSWAP,
};
/**
--
2.52.0
Powered by blists - more mailing lists