lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220704070612.299585-11-aneesh.kumar@linux.ibm.com>
Date:   Mon,  4 Jul 2022 12:36:10 +0530
From:   "Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>
To:     linux-mm@...ck.org, akpm@...ux-foundation.org
Cc:     Wei Xu <weixugc@...gle.com>, Huang Ying <ying.huang@...el.com>,
        Yang Shi <shy828301@...il.com>,
        Davidlohr Bueso <dave@...olabs.net>,
        Tim C Chen <tim.c.chen@...el.com>,
        Michal Hocko <mhocko@...nel.org>,
        Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
        Hesham Almatary <hesham.almatary@...wei.com>,
        Dave Hansen <dave.hansen@...el.com>,
        Jonathan Cameron <Jonathan.Cameron@...wei.com>,
        Alistair Popple <apopple@...dia.com>,
        Dan Williams <dan.j.williams@...el.com>,
        Johannes Weiner <hannes@...xchg.org>, jvgediya.oss@...il.com,
        "Aneesh Kumar K.V" <aneesh.kumar@...ux.ibm.com>
Subject: [PATCH v8 10/12] mm/demotion: Update node_is_toptier to work with memory tiers

With memory tiers support we can have memory only NUMA nodes
in the top tier from which we want to avoid promotion tracking NUMA
faults. Update node_is_toptier to work with memory tiers.
All NUMA nodes are by default top tier nodes. With lower memory
tiers added we consider all memory tiers above a memory tier having
CPU NUMA nodes as a top memory tier

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@...ux.ibm.com>
---
 include/linux/memory-tiers.h |  6 ++++++
 include/linux/node.h         |  5 -----
 mm/huge_memory.c             |  1 +
 mm/memory-tiers.c            | 41 ++++++++++++++++++++++++++++++++++++
 mm/migrate.c                 |  1 +
 mm/mprotect.c                |  1 +
 6 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 335d21a30b2c..ff1a08933575 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -44,6 +44,7 @@ int node_get_memory_tier_id(int node);
 int node_update_memory_tier(int node, int tier);
 struct memory_tier *node_get_memory_tier(int node);
 void node_put_memory_tier(struct memory_tier *memtier);
+bool node_is_toptier(int node);
 
 #else
 
@@ -62,5 +63,10 @@ static inline void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *target
 {
 	*targets = NODE_MASK_NONE;
 }
+
+static inline bool node_is_toptier(int node)
+{
+	return true;
+}
 #endif	/* CONFIG_NUMA */
 #endif  /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/node.h b/include/linux/node.h
index 40d641a8bfb0..9ec680dd607f 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -185,9 +185,4 @@ static inline void register_hugetlbfs_with_node(node_registration_func_t reg,
 
 #define to_node(device) container_of(device, struct node, dev)
 
-static inline bool node_is_toptier(int node)
-{
-	return node_state(node, N_CPU);
-}
-
 #endif /* _LINUX_NODE_H_ */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 834f288b3769..8405662646e9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -35,6 +35,7 @@
 #include <linux/numa.h>
 #include <linux/page_owner.h>
 #include <linux/sched/sysctl.h>
+#include <linux/memory-tiers.h>
 
 #include <asm/tlb.h>
 #include <asm/pgalloc.h>
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index aecce987df7c..7204f7381a15 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -18,6 +18,7 @@ struct demotion_nodes {
 static void establish_migration_targets(void);
 static DEFINE_MUTEX(memory_tier_lock);
 static LIST_HEAD(memory_tiers);
+static int top_tier_id;
 /*
  * node_demotion[] examples:
  *
@@ -373,6 +374,31 @@ void node_put_memory_tier(struct memory_tier *memtier)
 	put_device(&memtier->dev);
 }
 
+bool node_is_toptier(int node)
+{
+	bool toptier;
+	pg_data_t *pgdat;
+	struct memory_tier *memtier;
+
+	pgdat = NODE_DATA(node);
+	if (!pgdat)
+		return false;
+
+	rcu_read_lock();
+	memtier = rcu_dereference(pgdat->memtier);
+	if (!memtier) {
+		toptier = true;
+		goto out;
+	}
+	if (memtier->dev.id >= top_tier_id)
+		toptier = true;
+	else
+		toptier = false;
+out:
+	rcu_read_unlock();
+	return toptier;
+}
+
 #ifdef CONFIG_MIGRATION
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
 {
@@ -545,6 +571,21 @@ static void establish_migration_targets(void)
 		} while (1);
 	}
 build_lower_tier_mask:
+	/*
+	 * Promotion is allowed from a memory tier to higher
+	 * memory tier only if the memory tier doesn't include
+	 * compute. We want to  skip promotion from a memory tier,
+	 * if any node that is  part of the memory tier have CPUs.
+	 * Once we detect such a memory tier, we consider that tier
+	 * as top tiper from which promotion is not allowed.
+	 */
+	list_for_each_entry_reverse(memtier, &memory_tiers, list) {
+		nodes_and(used, node_states[N_CPU], memtier->nodelist);
+		if (!nodes_empty(used)) {
+			top_tier_id = memtier->dev.id;
+			break;
+		}
+	}
 	/*
 	 * Now build the lower_tier mask for each node collecting node mask from
 	 * all memory tier below it. This allows us to fallback demotion page
diff --git a/mm/migrate.c b/mm/migrate.c
index c758c9c21d7d..1da81136eaaa 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -50,6 +50,7 @@
 #include <linux/memory.h>
 #include <linux/random.h>
 #include <linux/sched/sysctl.h>
+#include <linux/memory-tiers.h>
 
 #include <asm/tlbflush.h>
 
diff --git a/mm/mprotect.c b/mm/mprotect.c
index ba5592655ee3..92a2fc0fa88b 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -31,6 +31,7 @@
 #include <linux/pgtable.h>
 #include <linux/sched/sysctl.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/memory-tiers.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
-- 
2.36.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ