lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20260108033248.2791579-3-bingjiao@google.com>
Date: Thu,  8 Jan 2026 03:32:47 +0000
From: Bing Jiao <bingjiao@...gle.com>
To: linux-mm@...ck.org
Cc: linux-kernel@...r.kernel.org, akpm@...ux-foundation.org, gourry@...rry.net, 
	longman@...hat.com, hannes@...xchg.org, mhocko@...nel.org, 
	roman.gushchin@...ux.dev, shakeel.butt@...ux.dev, muchun.song@...ux.dev, 
	tj@...nel.org, mkoutny@...e.com, david@...nel.org, zhengqi.arch@...edance.com, 
	lorenzo.stoakes@...cle.com, axelrasmussen@...gle.com, 
	chenridong@...weicloud.com, yuanchu@...gle.com, weixugc@...gle.com, 
	cgroups@...r.kernel.org, joshua.hahnjy@...il.com, bingjiao@...gle.com
Subject: [PATCH v7 2/2] mm/vmscan: select the closest preferred node in demote_folio_list()

The preferred demotion node (migration_target_control.nid) should be
the one closest to the source node to minimize migration latency.
Currently, a discrepancy exists where demote_folio_list() randomly
selects an allowed node if the preferred node from next_demotion_node()
is not set in mems_allowed.

To address it, update next_demotion_node() to return preferred nodes,
allowing the caller to select the preferred one.
Also update demote_folio_list() to traverse the demotion targets
hierarchically until a preferred node within mems_allowed is found.
It ensures that the selected demotion target is consistently
the closest available node.

Signed-off-by: Bing Jiao <bingjiao@...gle.com>
---
 include/linux/memory-tiers.h |  6 +++---
 mm/memory-tiers.c            | 11 +++++++----
 mm/vmscan.c                  | 25 ++++++++++++++++++++++---
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 7a805796fcfd..87652042f2c2 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -53,11 +53,11 @@ struct memory_dev_type *mt_find_alloc_memory_type(int adist,
 						  struct list_head *memory_types);
 void mt_put_memory_types(struct list_head *memory_types);
 #ifdef CONFIG_MIGRATION
-int next_demotion_node(int node);
+int next_demotion_node(int node, nodemask_t *preferred_nodes);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
 bool node_is_toptier(int node);
 #else
-static inline int next_demotion_node(int node)
+static inline int next_demotion_node(int node, nodemask_t *preferred_nodes)
 {
 	return NUMA_NO_NODE;
 }
@@ -101,7 +101,7 @@ static inline void clear_node_memory_type(int node, struct memory_dev_type *memt

 }

-static inline int next_demotion_node(int node)
+static inline int next_demotion_node(int node, nodemask_t *preferred_nodes)
 {
 	return NUMA_NO_NODE;
 }
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 864811fff409..286e4b5fa0e5 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -320,13 +320,14 @@ void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets)
 /**
  * next_demotion_node() - Get the next node in the demotion path
  * @node: The starting node to lookup the next node
+ * @preferred_nodes: The pointer to nodemask of all preferred nodes to return
  *
  * Return: node id for next memory node in the demotion path hierarchy
- * from @node; NUMA_NO_NODE if @node is terminal.  This does not keep
- * @node online or guarantee that it *continues* to be the next demotion
- * target.
+ * from @node; NUMA_NO_NODE if @node is terminal. Also returns all preferred
+ * nodes in @preferred_nodes. This does not keep @node online or guarantee
+ * that it *continues* to be the next demotion target.
  */
-int next_demotion_node(int node)
+int next_demotion_node(int node, nodemask_t *preferred_nodes)
 {
 	struct demotion_nodes *nd;
 	int target;
@@ -357,6 +358,8 @@ int next_demotion_node(int node)
 	 * target node randomly seems better until now.
 	 */
 	target = node_random(&nd->preferred);
+	if (preferred_nodes)
+		nodes_copy(*preferred_nodes, nd->preferred);
 	rcu_read_unlock();

 	return target;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 94ff5aa7c4fb..213ee75b3306 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1024,9 +1024,10 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
 				      struct pglist_data *pgdat,
 				      struct mem_cgroup *memcg)
 {
-	int target_nid = next_demotion_node(pgdat->node_id);
+	int target_nid;
 	unsigned int nr_succeeded;
 	nodemask_t allowed_mask;
+	nodemask_t preferred;

 	struct migration_target_control mtc = {
 		/*
@@ -1052,8 +1053,26 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
 	if (nodes_empty(allowed_mask))
 		return 0;

-	if (!node_isset(target_nid, allowed_mask))
-		target_nid = node_random(&allowed_mask);
+	target_nid = next_demotion_node(pgdat->node_id, &preferred);
+	while (target_nid != NUMA_NO_NODE &&
+	       !node_isset(target_nid, allowed_mask)) {
+		/* Filter out preferred nodes that are not in allowed. */
+		nodes_and(preferred, preferred, allowed_mask);
+		if (!nodes_empty(preferred)) {
+			/* Randomly select one node from preferred. */
+			target_nid = node_random(&preferred);
+			break;
+		}
+		/*
+		 * Preferred nodes in the lower tier are not set in allowed.
+		 * Recursively get preferred from the next lower tier.
+		 */
+		target_nid = next_demotion_node(target_nid, &preferred);
+	}
+
+	if (target_nid == NUMA_NO_NODE)
+		/* Nodes are gone (e.g., hot-unplugged). */
+		return 0;
 	mtc.nid = target_nid;

 	/* Demotion ignores all cpuset and mempolicy settings */
--
2.52.0.457.g6b5491de43-goog


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ