[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220603134237.131362-10-aneesh.kumar@linux.ibm.com>
Date: Fri, 3 Jun 2022 19:12:37 +0530
From: "Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>
To: linux-mm@...ck.org, akpm@...ux-foundation.org
Cc: Wei Xu <weixugc@...gle.com>, Huang Ying <ying.huang@...el.com>,
Greg Thelen <gthelen@...gle.com>,
Yang Shi <shy828301@...il.com>,
Davidlohr Bueso <dave@...olabs.net>,
Tim C Chen <tim.c.chen@...el.com>,
Brice Goglin <brice.goglin@...il.com>,
Michal Hocko <mhocko@...nel.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
Hesham Almatary <hesham.almatary@...wei.com>,
Dave Hansen <dave.hansen@...el.com>,
Jonathan Cameron <Jonathan.Cameron@...wei.com>,
Alistair Popple <apopple@...dia.com>,
Dan Williams <dan.j.williams@...el.com>,
Feng Tang <feng.tang@...el.com>,
Jagdish Gediya <jvgediya@...ux.ibm.com>,
Baolin Wang <baolin.wang@...ux.alibaba.com>,
David Rientjes <rientjes@...gle.com>,
"Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>
Subject: [PATCH v5 9/9] mm/demotion: Update node_is_toptier to work with memory tiers
With memory tiers support we can have memory on NUMA nodes
in the top tier from which we want to avoid promotion tracking NUMA
faults. Update node_is_toptier to work with memory tiers. To
avoid taking locks, a nodemask is maintained for all demotion
targets. All NUMA nodes are by default top tier nodes and as
we add new lower memory tiers NUMA nodes get added to the
demotion targets thereby moving them out of the top tier.
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.ibm.com>
---
include/linux/memory-tiers.h | 16 ++++++++++++++++
include/linux/node.h | 5 -----
mm/huge_memory.c | 1 +
mm/memory-tiers.c | 10 ++++++++++
mm/migrate.c | 1 +
mm/mprotect.c | 1 +
6 files changed, 29 insertions(+), 5 deletions(-)
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index cd6e71f702ad..32e0e6fabf02 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -16,12 +16,23 @@
#define MAX_MEMORY_TIERS 3
extern bool numa_demotion_enabled;
+extern nodemask_t demotion_target_mask;
int next_demotion_node(int node);
void node_remove_from_memory_tier(int node);
int node_get_memory_tier_id(int node);
int node_set_memory_tier(int node, int tier);
int node_reset_memory_tier(int node, int tier);
void node_get_allowed_targets(int node, nodemask_t *targets);
+
+/*
+ * By default all nodes are top tiper. As we create new memory tiers
+ * we below top tiers we add them to NON_TOP_TIER state.
+ */
+static inline bool node_is_toptier(int node)
+{
+ return !node_isset(node, demotion_target_mask);
+}
+
#else
#define numa_demotion_enabled false
static inline int next_demotion_node(int node)
@@ -33,6 +44,11 @@ static inline void node_get_allowed_targets(int node, nodemask_t *targets)
{
*targets = NODE_MASK_NONE;
}
+
+static inline bool node_is_toptier(int node)
+{
+ return true;
+}
#endif /* CONFIG_TIERED_MEMORY */
#endif
diff --git a/include/linux/node.h b/include/linux/node.h
index 40d641a8bfb0..9ec680dd607f 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
#define to_node(device) container_of(device, struct node, dev)
-static inline bool node_is_toptier(int node)
-{
- return node_state(node, N_CPU);
-}
-
#endif /* _LINUX_NODE_H_ */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a77c78a2b6b5..294873d4be2b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -35,6 +35,7 @@
#include <linux/numa.h>
#include <linux/page_owner.h>
#include <linux/sched/sysctl.h>
+#include <linux/memory-tiers.h>
#include <asm/tlb.h>
#include <asm/pgalloc.h>
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 592d939ec28d..df8e9910165a 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -31,6 +31,7 @@ static struct bus_type memory_tier_subsys = {
static void establish_migration_targets(void);
static DEFINE_MUTEX(memory_tier_lock);
static LIST_HEAD(memory_tiers);
+nodemask_t demotion_target_mask;
/*
* node_demotion[] examples:
@@ -541,6 +542,15 @@ static void establish_migration_targets(void)
*/
list_for_each_entry(memtier, &memory_tiers, list)
nodes_or(allowed, allowed, memtier->nodelist);
+ /*
+ * Add nodes to demotion target mask so that we can find
+ * top tier easily.
+ */
+ memtier = list_first_entry(&memory_tiers, struct memory_tier, list);
+ if (memtier)
+ nodes_andnot(demotion_target_mask, allowed, memtier->nodelist);
+ else
+ demotion_target_mask = NODE_MASK_NONE;
/*
* Removes nodes not yet in N_MEMORY.
*/
diff --git a/mm/migrate.c b/mm/migrate.c
index 0b554625a219..78615c48fc0f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -50,6 +50,7 @@
#include <linux/memory.h>
#include <linux/random.h>
#include <linux/sched/sysctl.h>
+#include <linux/memory-tiers.h>
#include <asm/tlbflush.h>
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba5592655ee3..92a2fc0fa88b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -31,6 +31,7 @@
#include <linux/pgtable.h>
#include <linux/sched/sysctl.h>
#include <linux/userfaultfd_k.h>
+#include <linux/memory-tiers.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
--
2.36.1
Powered by blists - more mailing lists