[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20240628060925.303309-2-horen.chuang@linux.dev>
Date: Fri, 28 Jun 2024 06:09:23 +0000
From: "Ho-Ren (Jack) Chuang" <horen.chuang@...ux.dev>
To: "Jonathan Cameron" <Jonathan.Cameron@...wei.com>,
"Huang, Ying" <ying.huang@...el.com>,
"Gregory Price" <gourry.memverge@...il.com>,
aneesh.kumar@...ux.ibm.com,
mhocko@...e.com,
tj@...nel.org,
john@...alactic.com,
"Eishan Mirakhur" <emirakhur@...ron.com>,
"Vinicius Tavares Petrucci" <vtavarespetr@...ron.com>,
"Ravis OpenSrc" <Ravis.OpenSrc@...ron.com>,
"Alistair Popple" <apopple@...dia.com>,
"Srinivasulu Thanneeru" <sthanneeru@...ron.com>,
"SeongJae Park" <sj@...nel.org>,
"Rafael J. Wysocki" <rafael@...nel.org>,
Len Brown <lenb@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Dave Jiang <dave.jiang@...el.com>,
Dan Williams <dan.j.williams@...el.com>,
Jonathan Cameron <Jonathan.Cameron@...wei.com>,
"Ho-Ren (Jack) Chuang" <horen.chuang@...ux.dev>,
linux-acpi@...r.kernel.org,
linux-kernel@...r.kernel.org,
linux-mm@...ck.org
Cc: "Ho-Ren (Jack) Chuang" <horenc@...edu>,
"Ho-Ren (Jack) Chuang" <horenchuang@...edance.com>,
"Ho-Ren (Jack) Chuang" <horenchuang@...il.com>,
linux-cxl@...r.kernel.org,
qemu-devel@...gnu.org
Subject: [PATCH v2 1/1] memory tier: consolidate the initialization of memory tiers
If we simply move the set_node_memory_tier() from memory_tier_init()
to late_initcall(), it will result in HMAT not registering
the mt_adistance_algorithm callback function, because
set_node_memory_tier() is not performed during the memory tiering
initialization phase, leading to a lack of correct default_dram
information.
Therefore, we introduced a nodemask to pass the information of the
default DRAM nodes. The reason for not choosing to reuse
default_dram_type->nodes is that it is not clean enough. So in the end,
we use a __initdata variable, which is a variable that is released once
initialization is complete, including both CPU and memory nodes for HMAT
to iterate through.
Besides, since default_dram_type may be checked/used during the
initialization process of HMAT and drivers, it is better to keep the
allocation of default_dram_type in memory_tier_init().
Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@...edance.com>
Suggested-by: Jonathan Cameron <Jonathan.Cameron@...wei.com>
---
drivers/acpi/numa/hmat.c | 5 +--
include/linux/memory-tiers.h | 2 ++
mm/memory-tiers.c | 59 +++++++++++++++---------------------
3 files changed, 28 insertions(+), 38 deletions(-)
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 2c8ccc91ebe6..a2f9e7a4b479 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -940,10 +940,7 @@ static int hmat_set_default_dram_perf(void)
struct memory_target *target;
struct access_coordinate *attrs;
- if (!default_dram_type)
- return -EIO;
-
- for_each_node_mask(nid, default_dram_type->nodes) {
+ for_each_node_mask(nid, default_dram_nodes) {
pxm = node_to_pxm(nid);
target = find_mem_target(pxm);
if (!target)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0d70788558f4..fa61ad9c4d75 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -38,6 +38,7 @@ struct access_coordinate;
#ifdef CONFIG_NUMA
extern bool numa_demotion_enabled;
extern struct memory_dev_type *default_dram_type;
+extern nodemask_t default_dram_nodes __initdata;
struct memory_dev_type *alloc_memory_type(int adistance);
void put_memory_type(struct memory_dev_type *memtype);
void init_node_memory_type(int node, struct memory_dev_type *default_type);
@@ -76,6 +77,7 @@ static inline bool node_is_toptier(int node)
#define numa_demotion_enabled false
#define default_dram_type NULL
+#define default_dram_nodes NODE_MASK_NONE
/*
* CONFIG_NUMA implementation returns non NULL error.
*/
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 6632102bd5c9..a19a90c3ad36 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -43,6 +43,7 @@ static LIST_HEAD(memory_tiers);
static LIST_HEAD(default_memory_types);
static struct node_memory_type_map node_memory_types[MAX_NUMNODES];
struct memory_dev_type *default_dram_type;
+nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE;
static const struct bus_type memory_tier_subsys = {
.name = "memory_tiering",
@@ -671,28 +672,38 @@ EXPORT_SYMBOL_GPL(mt_put_memory_types);
/*
* This is invoked via `late_initcall()` to initialize memory tiers for
- * CPU-less memory nodes after driver initialization, which is
- * expected to provide `adistance` algorithms.
+ * memory nodes, both with and without CPUs. After the initialization of
+ * firmware and devices, adistance algorithms are expected to be provided.
*/
static int __init memory_tier_late_init(void)
{
int nid;
+ struct memory_tier *memtier;
+ get_online_mems();
guard(mutex)(&memory_tier_lock);
+ /*
+ * Look at all the existing and uninitialized N_MEMORY nodes and
+ * add them to default memory tier or to a tier if we already have
+ * memory types assigned.
+ */
for_each_node_state(nid, N_MEMORY) {
/*
- * Some device drivers may have initialized memory tiers
- * between `memory_tier_init()` and `memory_tier_late_init()`,
- * potentially bringing online memory nodes and
- * configuring memory tiers. Exclude them here.
+ * Some device drivers may have initialized
+ * memory tiers, potentially bringing memory nodes
+ * online and configuring memory tiers.
+ * Exclude them here.
*/
if (node_memory_types[nid].memtype)
continue;
- set_node_memory_tier(nid);
+ memtier = set_node_memory_tier(nid);
+ if (IS_ERR(memtier))
+ /* Continue with memtiers we are able to setup. */
+ break;
}
-
establish_demotion_targets();
+ put_online_mems();
return 0;
}
@@ -875,8 +886,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
static int __init memory_tier_init(void)
{
- int ret, node;
- struct memory_tier *memtier;
+ int ret;
ret = subsys_virtual_register(&memory_tier_subsys, NULL);
if (ret)
@@ -887,7 +897,8 @@ static int __init memory_tier_init(void)
GFP_KERNEL);
WARN_ON(!node_demotion);
#endif
- mutex_lock(&memory_tier_lock);
+
+ guard(mutex)(&memory_tier_lock);
/*
* For now we can have 4 faster memory tiers with smaller adistance
* than default DRAM tier.
@@ -897,29 +908,9 @@ static int __init memory_tier_init(void)
if (IS_ERR(default_dram_type))
panic("%s() failed to allocate default DRAM tier\n", __func__);
- /*
- * Look at all the existing N_MEMORY nodes and add them to
- * default memory tier or to a tier if we already have memory
- * types assigned.
- */
- for_each_node_state(node, N_MEMORY) {
- if (!node_state(node, N_CPU))
- /*
- * Defer memory tier initialization on
- * CPUless numa nodes. These will be initialized
- * after firmware and devices are initialized.
- */
- continue;
-
- memtier = set_node_memory_tier(node);
- if (IS_ERR(memtier))
- /*
- * Continue with memtiers we are able to setup
- */
- break;
- }
- establish_demotion_targets();
- mutex_unlock(&memory_tier_lock);
+ /* Record nodes with memory and CPU to set default DRAM performance. */
+ nodes_and(default_dram_nodes, node_states[N_MEMORY],
+ node_states[N_CPU]);
hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
return 0;
--
Ho-Ren (Jack) Chuang
Powered by blists - more mailing lists