[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20251107224956.477056-3-gourry@gourry.net>
Date: Fri, 7 Nov 2025 17:49:47 -0500
From: Gregory Price <gourry@...rry.net>
To: linux-mm@...ck.org
Cc: linux-cxl@...r.kernel.org,
linux-kernel@...r.kernel.org,
nvdimm@...ts.linux.dev,
linux-fsdevel@...r.kernel.org,
cgroups@...r.kernel.org,
dave@...olabs.net,
jonathan.cameron@...wei.com,
dave.jiang@...el.com,
alison.schofield@...el.com,
vishal.l.verma@...el.com,
ira.weiny@...el.com,
dan.j.williams@...el.com,
longman@...hat.com,
akpm@...ux-foundation.org,
david@...hat.com,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
osalvador@...e.de,
ziy@...dia.com,
matthew.brost@...el.com,
joshua.hahnjy@...il.com,
rakie.kim@...com,
byungchul@...com,
gourry@...rry.net,
ying.huang@...ux.alibaba.com,
apopple@...dia.com,
mingo@...hat.com,
peterz@...radead.org,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
dietmar.eggemann@....com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
tj@...nel.org,
hannes@...xchg.org,
mkoutny@...e.com,
kees@...nel.org,
muchun.song@...ux.dev,
roman.gushchin@...ux.dev,
shakeel.butt@...ux.dev,
rientjes@...gle.com,
jackmanb@...gle.com,
cl@...two.org,
harry.yoo@...cle.com,
axelrasmussen@...gle.com,
yuanchu@...gle.com,
weixugc@...gle.com,
zhengqi.arch@...edance.com,
yosry.ahmed@...ux.dev,
nphamcs@...il.com,
chengming.zhou@...ux.dev,
fabio.m.de.francesco@...ux.intel.com,
rrichter@....com,
ming.li@...omail.com,
usamaarif642@...il.com,
brauner@...nel.org,
oleg@...hat.com,
namcao@...utronix.de,
escape@...ux.alibaba.com,
dongjoo.seo1@...sung.com
Subject: [RFC PATCH 2/9] memory-tiers: create default_sysram_nodes
Record the set of memory nodes present at __init time, so that hotplug
memory nodes can choose whether to expose themselves to the page
allocator at hotplug time.
Do not included non-sysram nodes in demotion targets.
Signed-off-by: Gregory Price <gourry@...rry.net>
---
include/linux/memory-tiers.h | 3 +++
mm/memory-tiers.c | 22 ++++++++++++++++++++--
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 7a805796fcfd..3d3f3687d134 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -39,6 +39,9 @@ struct access_coordinate;
extern bool numa_demotion_enabled;
extern struct memory_dev_type *default_dram_type;
extern nodemask_t default_dram_nodes;
+extern nodemask_t default_sysram_nodelist;
+#define default_sysram_nodes (nodes_empty(default_sysram_nodelist) ? NULL : \
+ &default_sysram_nodelist)
struct memory_dev_type *alloc_memory_type(int adistance);
void put_memory_type(struct memory_dev_type *memtype);
void init_node_memory_type(int node, struct memory_dev_type *default_type);
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 0ea5c13f10a2..b2ee4f73ad54 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -44,7 +44,12 @@ static LIST_HEAD(memory_tiers);
static LIST_HEAD(default_memory_types);
static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
struct memory_dev_type *default_dram_type;
-nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE;
+
+/* default_dram_nodes is the list of nodes with both CPUs and RAM */
+nodemask_t default_dram_nodes = NODE_MASK_NONE;
+
+/* default_sysram_nodelist is the list of nodes with RAM at __init time */
+nodemask_t default_sysram_nodelist = NODE_MASK_NONE;
static const struct bus_type memory_tier_subsys = {
.name = "memory_tiering",
@@ -427,6 +432,14 @@ static void establish_demotion_targets(void)
disable_all_demotion_targets();
for_each_node_state(node, N_MEMORY) {
+ /*
+ * If this is not a sysram node, direct-demotion is not allowed
+ * and must be managed by special logic that understands the
+ * memory features of that particular node.
+ */
+ if (!node_isset(node, default_sysram_nodelist))
+ continue;
+
best_distance = -1;
nd = &node_demotion[node];
@@ -457,7 +470,8 @@ static void establish_demotion_targets(void)
break;
distance = node_distance(node, target);
- if (distance == best_distance || best_distance == -1) {
+ if ((distance == best_distance || best_distance == -1) &&
+ node_isset(target, default_sysram_nodelist)) {
best_distance = distance;
node_set(target, nd->preferred);
} else {
@@ -812,6 +826,7 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist)
}
EXPORT_SYMBOL_GPL(mt_perf_to_adistance);
+
/**
* register_mt_adistance_algorithm() - Register memory tiering abstract distance algorithm
* @nb: The notifier block which describe the algorithm
@@ -922,6 +937,9 @@ static int __init memory_tier_init(void)
nodes_and(default_dram_nodes, node_states[N_MEMORY],
node_states[N_CPU]);
+ /* Record all nodes with non-hotplugged memory as default SYSRAM nodes */
+ default_sysram_nodelist = node_states[N_MEMORY];
+
hotplug_node_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
return 0;
}
--
2.51.1
Powered by blists - more mailing lists