lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231213175329.594-3-sthanneeru.opensrc@micron.com>
Date:   Wed, 13 Dec 2023 23:23:29 +0530
From:   <sthanneeru.opensrc@...ron.com>
To:     <sthanneeru.opensrc@...ron.com>, <linux-cxl@...r.kernel.org>,
        <linux-mm@...ck.org>
CC:     <sthanneeru@...ron.com>, <aneesh.kumar@...ux.ibm.com>,
        <dan.j.williams@...el.com>, <ying.huang@...el.com>,
        <gregory.price@...verge.com>, <mhocko@...e.com>, <tj@...nel.org>,
        <john@...alactic.com>, <emirakhur@...ron.com>,
        <vtavarespetr@...ron.com>, <Ravis.OpenSrc@...ron.com>,
        <Jonathan.Cameron@...wei.com>, <linux-kernel@...r.kernel.org>
Subject: [PATCH 2/2] memory tier: Support node migration between tiers

From: Srinivasulu Thanneeru <sthanneeru.opensrc@...ron.com>

Node migration enables the grouping or migration of nodes
between tiers based on nodes' latencies and bandwidth characteristics.
Since nodes of the same memory-type can exist in different tiers and
can migrate from one tier to another, it is necessary to maintain
nodes per tier instead of maintaining a list of nodes grouped using
memory type(siblings) within the tier.

Signed-off-by: Srinivasulu Thanneeru <sthanneeru.opensrc@...ron.com>
---
 drivers/base/node.c          |  6 ++++
 include/linux/memory-tiers.h |  5 +++
 include/linux/node.h         |  5 +++
 mm/memory-tiers.c            | 66 +++++++++++++++++-------------------
 4 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 788176b3585a..179d9004e4f3 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -597,6 +597,7 @@ static ssize_t memtier_override_store(struct device *dev,
 		return size;
 	ret = get_memtier_adistance_offset(nid, memtier);
 	node_devices[nid]->adistance_offset = ret;
+	node_memtier_change(nid);
 
 	return size;
 }
@@ -607,6 +608,11 @@ void set_node_memtierid(int node, int memtierid)
 	node_devices[node]->memtier = memtierid;
 }
 
+int get_node_adistance_offset(int node)
+{
+	return node_devices[node]->adistance_offset;
+}
+
 static struct attribute *node_dev_attrs[] = {
 	&dev_attr_meminfo.attr,
 	&dev_attr_numastat.attr,
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 0dba8027e785..b323c2e2e417 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -54,6 +54,7 @@ int mt_set_default_dram_perf(int nid, struct node_hmem_attrs *perf,
 			     const char *source);
 int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist);
 int get_memtier_adistance_offset(int node, int memtier);
+void node_memtier_change(int node);
 #ifdef CONFIG_MIGRATION
 int next_demotion_node(int node);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -142,5 +143,9 @@ static inline int mt_perf_to_adistance(struct node_hmem_attrs *perf, int *adist)
 {
 	return -EIO;
 }
+
+static inline void node_memtier_change(int node)
+{
+}
 #endif	/* CONFIG_NUMA */
 #endif  /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/node.h b/include/linux/node.h
index 1c4f4be39db4..da679577a271 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -141,6 +141,7 @@ extern int register_memory_node_under_compute_node(unsigned int mem_nid,
 						   unsigned int cpu_nid,
 						   unsigned access);
 extern void set_node_memtierid(int node, int memtierid);
+extern int get_node_adistance_offset(int nid);
 #else
 static inline void node_dev_init(void)
 {
@@ -171,6 +172,10 @@ static inline void unregister_memory_block_under_nodes(struct memory_block *mem_
 static inline void set_node_memtierid(int node, int memtierid)
 {
 }
+static inline int get_node_adistance_offset(int nid)
+{
+	return 0;
+}
 #endif
 
 #define to_node(device) container_of(device, struct node, dev)
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 31ed3c577836..66e1eae97e47 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -23,6 +23,8 @@ struct memory_tier {
 	struct device dev;
 	/* All the nodes that are part of all the lower memory tiers. */
 	nodemask_t lower_tier_mask;
+	/* Nodes linked to this tier*/
+	nodemask_t nodes;
 };
 
 struct demotion_nodes {
@@ -120,13 +122,7 @@ static inline struct memory_tier *to_memory_tier(struct device *device)
 
 static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier)
 {
-	nodemask_t nodes = NODE_MASK_NONE;
-	struct memory_dev_type *memtype;
-
-	list_for_each_entry(memtype, &memtier->memory_types, tier_sibling)
-		nodes_or(nodes, nodes, memtype->nodes);
-
-	return nodes;
+	return memtier->nodes;
 }
 
 static void memory_tier_device_release(struct device *dev)
@@ -182,33 +178,22 @@ int get_memtier_adistance_offset(int node, int memtier)
 	return adistance_offset;
 }
 
-static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype)
+static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype,
+						   int tier_adistance)
 {
 	int ret;
 	bool found_slot = false;
 	struct memory_tier *memtier, *new_memtier;
-	int adistance = memtype->adistance;
+	int adistance;
 	unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE;
 
 	lockdep_assert_held_once(&memory_tier_lock);
 
-	adistance = round_down(adistance, memtier_adistance_chunk_size);
-	/*
-	 * If the memtype is already part of a memory tier,
-	 * just return that.
-	 */
-	if (!list_empty(&memtype->tier_sibling)) {
-		list_for_each_entry(memtier, &memory_tiers, list) {
-			if (adistance == memtier->adistance_start)
-				return memtier;
-		}
-		WARN_ON(1);
-		return ERR_PTR(-EINVAL);
-	}
+	adistance = round_down(tier_adistance, memtier_adistance_chunk_size);
 
 	list_for_each_entry(memtier, &memory_tiers, list) {
 		if (adistance == memtier->adistance_start) {
-			goto link_memtype;
+			return memtier;
 		} else if (adistance < memtier->adistance_start) {
 			found_slot = true;
 			break;
@@ -238,11 +223,8 @@ static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memty
 		put_device(&new_memtier->dev);
 		return ERR_PTR(ret);
 	}
-	memtier = new_memtier;
 
-link_memtype:
-	list_add(&memtype->tier_sibling, &memtier->memory_types);
-	return memtier;
+	return new_memtier;
 }
 
 static struct memory_tier *__node_get_memory_tier(int node)
@@ -500,7 +482,7 @@ static struct memory_tier *set_node_memory_tier(int node)
 	struct memory_tier *memtier;
 	struct memory_dev_type *memtype;
 	pg_data_t *pgdat = NODE_DATA(node);
-
+	int tier_adistance;
 
 	lockdep_assert_held_once(&memory_tier_lock);
 
@@ -511,11 +493,15 @@ static struct memory_tier *set_node_memory_tier(int node)
 
 	memtype = node_memory_types[node].memtype;
 	node_set(node, memtype->nodes);
-	memtier = find_create_memory_tier(memtype);
+	tier_adistance = get_node_adistance_offset(node);
+	tier_adistance = memtype->adistance + tier_adistance;
+
+	memtier = find_create_memory_tier(memtype, tier_adistance);
 	if (!IS_ERR(memtier)) {
 		rcu_assign_pointer(pgdat->memtier, memtier);
 		set_node_memtierid(node, memtier->dev.id);
 	}
+	node_set(node, memtier->nodes);
 	return memtier;
 }
 
@@ -551,11 +537,9 @@ static bool clear_node_memory_tier(int node)
 		synchronize_rcu();
 		memtype = node_memory_types[node].memtype;
 		node_clear(node, memtype->nodes);
-		if (nodes_empty(memtype->nodes)) {
-			list_del_init(&memtype->tier_sibling);
-			if (list_empty(&memtier->memory_types))
-				destroy_memory_tier(memtier);
-		}
+		node_clear(node, memtier->nodes);
+		if (nodes_empty(memtier->nodes))
+			destroy_memory_tier(memtier);
 		cleared = true;
 	}
 	return cleared;
@@ -578,7 +562,6 @@ struct memory_dev_type *alloc_memory_type(int adistance)
 		return ERR_PTR(-ENOMEM);
 
 	memtype->adistance = adistance;
-	INIT_LIST_HEAD(&memtype->tier_sibling);
 	memtype->nodes  = NODE_MASK_NONE;
 	kref_init(&memtype->kref);
 	return memtype;
@@ -618,6 +601,19 @@ void clear_node_memory_type(int node, struct memory_dev_type *memtype)
 }
 EXPORT_SYMBOL_GPL(clear_node_memory_type);
 
+void node_memtier_change(int node)
+{
+	struct memory_tier *memtier;
+
+	mutex_lock(&memory_tier_lock);
+	if (clear_node_memory_tier(node))
+		establish_demotion_targets();
+	memtier = set_node_memory_tier(node);
+	if (!IS_ERR(memtier))
+		establish_demotion_targets();
+	mutex_unlock(&memory_tier_lock);
+}
+
 static void dump_hmem_attrs(struct node_hmem_attrs *attrs, const char *prefix)
 {
 	pr_info(
-- 
2.25.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ