[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260114235022.3437787-4-gourry@gourry.net>
Date: Wed, 14 Jan 2026 18:50:19 -0500
From: Gregory Price <gourry@...rry.net>
To: linux-mm@...ck.org
Cc: linux-cxl@...r.kernel.org,
nvdimm@...ts.linux.dev,
linux-kernel@...r.kernel.org,
virtualization@...ts.linux.dev,
kernel-team@...a.com,
dan.j.williams@...el.com,
vishal.l.verma@...el.com,
dave.jiang@...el.com,
david@...nel.org,
mst@...hat.com,
jasowang@...hat.com,
xuanzhuo@...ux.alibaba.com,
eperezma@...hat.com,
osalvador@...e.de,
akpm@...ux-foundation.org
Subject: [PATCH v2 3/5] dax/kmem: extract hotplug/hotremove helper functions
Refactor kmem _probe() _remove() by extracting init, cleanup, hotplug,
and hot-remove logic into separate helper functions:
- dax_kmem_init_resources: inits IO_RESOURCE w/ request_mem_region
- dax_kmem_cleanup_resources: cleans up initialized IO_RESOURCE
- dax_kmem_do_hotplug: handles memory region reservation and adding
- dax_kmem_do_hotremove: handles memory removal and resource cleanup
This is a pure refactoring with no functional change. The helpers will
enable future extensions to support more granular control over memory
hotplug operations.
We need to split hotplug/remove and init/cleanup in order to have the
resources available for hot-add. Otherwise, when probe occurs, the dax
devices are never added to sysfs because the resources are never
registered.
Signed-off-by: Gregory Price <gourry@...rry.net>
---
drivers/dax/kmem.c | 300 +++++++++++++++++++++++++++++++--------------
1 file changed, 206 insertions(+), 94 deletions(-)
diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c
index bb13d9ced2e9..3929cb8576de 100644
--- a/drivers/dax/kmem.c
+++ b/drivers/dax/kmem.c
@@ -65,14 +65,185 @@ static void kmem_put_memory_types(void)
mt_put_memory_types(&kmem_memory_types);
}
+/**
+ * dax_kmem_do_hotplug - hotplug memory for dax kmem device
+ * @dev_dax: the dev_dax instance
+ * @data: the dax_kmem_data structure with resource tracking
+ *
+ * Hotplugs all ranges in the dev_dax region as system memory.
+ *
+ * Returns the number of successfully mapped ranges, or negative error.
+ */
+static int dax_kmem_do_hotplug(struct dev_dax *dev_dax,
+ struct dax_kmem_data *data,
+ int online_type)
+{
+ struct device *dev = &dev_dax->dev;
+ int i, rc, onlined = 0;
+ mhp_t mhp_flags;
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct range range;
+
+ rc = dax_kmem_range(dev_dax, i, &range);
+ if (rc)
+ continue;
+
+ mhp_flags = MHP_NID_IS_MGID;
+ if (dev_dax->memmap_on_memory)
+ mhp_flags |= MHP_MEMMAP_ON_MEMORY;
+
+ /*
+ * Ensure that future kexec'd kernels will not treat
+ * this as RAM automatically.
+ */
+ rc = add_memory_driver_managed(data->mgid, range.start,
+ range_len(&range), kmem_name, mhp_flags,
+ online_type);
+
+ if (rc) {
+ dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
+ i, range.start, range.end);
+ if (onlined)
+ continue;
+ return rc;
+ }
+ onlined++;
+ }
+
+ return onlined;
+}
+
+/**
+ * dax_kmem_init_resources - create memory regions for dax kmem
+ * @dev_dax: the dev_dax instance
+ * @data: the dax_kmem_data structure with resource tracking
+ *
+ * Initializes all the resources for the DAX
+ *
+ * Returns the number of successfully mapped ranges, or negative error.
+ */
+static int dax_kmem_init_resources(struct dev_dax *dev_dax,
+ struct dax_kmem_data *data)
+{
+ struct device *dev = &dev_dax->dev;
+ int i, rc, mapped = 0;
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct resource *res;
+ struct range range;
+
+ rc = dax_kmem_range(dev_dax, i, &range);
+ if (rc)
+ continue;
+
+ /* Skip ranges already added */
+ if (data->res[i])
+ continue;
+
+ /* Region is permanently reserved if hotremove fails. */
+ res = request_mem_region(range.start, range_len(&range),
+ data->res_name);
+ if (!res) {
+ dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
+ i, range.start, range.end);
+ /*
+ * Once some memory has been onlined we can't
+ * assume that it can be un-onlined safely.
+ */
+ if (mapped)
+ continue;
+ return -EBUSY;
+ }
+ data->res[i] = res;
+ /*
+ * Set flags appropriate for System RAM. Leave ..._BUSY clear
+ * so that add_memory() can add a child resource. Do not
+ * inherit flags from the parent since it may set new flags
+ * unknown to us that will break add_memory() below.
+ */
+ res->flags = IORESOURCE_SYSTEM_RAM;
+ mapped++;
+ }
+ return mapped;
+}
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/**
+ * dax_kmem_do_hotremove - hot-remove memory for dax kmem device
+ * @dev_dax: the dev_dax instance
+ * @data: the dax_kmem_data structure with resource tracking
+ *
+ * Removes all ranges in the dev_dax region.
+ *
+ * Returns the number of successfully removed ranges.
+ */
+static int dax_kmem_do_hotremove(struct dev_dax *dev_dax,
+ struct dax_kmem_data *data)
+{
+ struct device *dev = &dev_dax->dev;
+ int i, success = 0;
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct range range;
+ int rc;
+
+ rc = dax_kmem_range(dev_dax, i, &range);
+ if (rc)
+ continue;
+
+ /* Skip ranges not currently added */
+ if (!data->res[i])
+ continue;
+
+ rc = remove_memory(range.start, range_len(&range));
+ if (rc == 0) {
+ success++;
+ continue;
+ }
+ any_hotremove_failed = true;
+ dev_err(dev, "mapping%d: %#llx-%#llx hotremove failed\n",
+ i, range.start, range.end);
+ }
+
+ return success;
+}
+#else
+static int dax_kmem_do_hotremove(struct dev_dax *dev_dax,
+ struct dax_kmem_data *data)
+{
+ return -ENOSUPP;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
+
+/**
+ * dax_kmem_cleanup_resources - remove the dax memory resources
+ * @dev_dax: the dev_dax instance
+ * @data: the dax_kmem_data structure with resource tracking
+ *
+ * Removes all resources in the dev_dax region.
+ */
+static void dax_kmem_cleanup_resources(struct dev_dax *dev_dax,
+ struct dax_kmem_data *data)
+{
+ int i;
+
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ if (!data->res[i])
+ continue;
+ remove_resource(data->res[i]);
+ kfree(data->res[i]);
+ data->res[i] = NULL;
+ }
+}
+
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
unsigned long total_len = 0, orig_len = 0;
struct dax_kmem_data *data;
struct memory_dev_type *mtype;
- int i, rc, mapped = 0;
- mhp_t mhp_flags;
+ int i, rc;
int numa_node;
int adist = MEMTIER_DEFAULT_DAX_ADISTANCE;
@@ -134,68 +305,26 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
goto err_reg_mgid;
data->mgid = rc;
- for (i = 0; i < dev_dax->nr_range; i++) {
- struct resource *res;
- struct range range;
-
- rc = dax_kmem_range(dev_dax, i, &range);
- if (rc)
- continue;
-
- /* Region is permanently reserved if hotremove fails. */
- res = request_mem_region(range.start, range_len(&range), data->res_name);
- if (!res) {
- dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
- i, range.start, range.end);
- /*
- * Once some memory has been onlined we can't
- * assume that it can be un-onlined safely.
- */
- if (mapped)
- continue;
- rc = -EBUSY;
- goto err_request_mem;
- }
- data->res[i] = res;
-
- /*
- * Set flags appropriate for System RAM. Leave ..._BUSY clear
- * so that add_memory() can add a child resource. Do not
- * inherit flags from the parent since it may set new flags
- * unknown to us that will break add_memory() below.
- */
- res->flags = IORESOURCE_SYSTEM_RAM;
-
- mhp_flags = MHP_NID_IS_MGID;
- if (dev_dax->memmap_on_memory)
- mhp_flags |= MHP_MEMMAP_ON_MEMORY;
-
- /*
- * Ensure that future kexec'd kernels will not treat
- * this as RAM automatically.
- */
- rc = add_memory_driver_managed(data->mgid, range.start,
- range_len(&range), kmem_name, mhp_flags,
- mhp_get_default_online_type());
+ dev_set_drvdata(dev, data);
- if (rc) {
- dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
- i, range.start, range.end);
- remove_resource(res);
- kfree(res);
- data->res[i] = NULL;
- if (mapped)
- continue;
- goto err_request_mem;
- }
- mapped++;
- }
+ rc = dax_kmem_init_resources(dev_dax, data);
+ if (rc < 0)
+ goto err_resources;
- dev_set_drvdata(dev, data);
+ /*
+ * Hotplug using the system default policy - this preserves backwards
+ * for existing users who rely on the default auto-online behavior.
+ */
+ rc = dax_kmem_do_hotplug(dev_dax, data, mhp_get_default_online_type());
+ if (rc < 0)
+ goto err_hotplug;
return 0;
-err_request_mem:
+err_hotplug:
+ dax_kmem_cleanup_resources(dev_dax, data);
+err_resources:
+ dev_set_drvdata(dev, NULL);
memory_group_unregister(data->mgid);
err_reg_mgid:
kfree(data->res_name);
@@ -209,7 +338,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
#ifdef CONFIG_MEMORY_HOTREMOVE
static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
{
- int i, success = 0;
+ int success;
int node = dev_dax->target_node;
struct device *dev = &dev_dax->dev;
struct dax_kmem_data *data = dev_get_drvdata(dev);
@@ -220,42 +349,25 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
* there is no way to hotremove this memory until reboot because device
* unbind will succeed even if we return failure.
*/
- for (i = 0; i < dev_dax->nr_range; i++) {
- struct range range;
- int rc;
-
- rc = dax_kmem_range(dev_dax, i, &range);
- if (rc)
- continue;
-
- rc = remove_memory(range.start, range_len(&range));
- if (rc == 0) {
- remove_resource(data->res[i]);
- kfree(data->res[i]);
- data->res[i] = NULL;
- success++;
- continue;
- }
- any_hotremove_failed = true;
- dev_err(dev,
- "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
- i, range.start, range.end);
+ success = dax_kmem_do_hotremove(dev_dax, data);
+ if (success < dev_dax->nr_range) {
+ dev_err(dev, "Hotplug regions stuck online until reboot\n");
+ return;
}
- if (success >= dev_dax->nr_range) {
- memory_group_unregister(data->mgid);
- kfree(data->res_name);
- kfree(data);
- dev_set_drvdata(dev, NULL);
- /*
- * Clear the memtype association on successful unplug.
- * If not, we have memory blocks left which can be
- * offlined/onlined later. We need to keep memory_dev_type
- * for that. This implies this reference will be around
- * till next reboot.
- */
- clear_node_memory_type(node, NULL);
- }
+ dax_kmem_cleanup_resources(dev_dax, data);
+ memory_group_unregister(data->mgid);
+ kfree(data->res_name);
+ kfree(data);
+ dev_set_drvdata(dev, NULL);
+ /*
+ * Clear the memtype association on successful unplug.
+ * If not, we have memory blocks left which can be
+ * offlined/onlined later. We need to keep memory_dev_type
+ * for that. This implies this reference will be around
+ * till next reboot.
+ */
+ clear_node_memory_type(node, NULL);
}
#else
static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
--
2.52.0
Powered by blists - more mailing lists