lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260120134256.2271710-6-chenridong@huaweicloud.com>
Date: Tue, 20 Jan 2026 13:42:54 +0000
From: Chen Ridong <chenridong@...weicloud.com>
To: akpm@...ux-foundation.org,
	axelrasmussen@...gle.com,
	yuanchu@...gle.com,
	weixugc@...gle.com,
	david@...nel.org,
	lorenzo.stoakes@...cle.com,
	Liam.Howlett@...cle.com,
	vbabka@...e.cz,
	rppt@...nel.org,
	surenb@...gle.com,
	mhocko@...e.com,
	corbet@....net,
	skhan@...uxfoundation.org,
	hannes@...xchg.org,
	roman.gushchin@...ux.dev,
	shakeel.butt@...ux.dev,
	muchun.song@...ux.dev,
	zhengqi.arch@...edance.com
Cc: linux-mm@...ck.org,
	linux-doc@...r.kernel.org,
	linux-kernel@...r.kernel.org,
	cgroups@...r.kernel.org,
	lujialin4@...wei.com,
	chenridong@...weicloud.com,
	ryncsn@...il.com
Subject: [RFC PATCH -next 5/7] mm/mglru: combine shrink_many into shrink_node_memcgs

From: Chen Ridong <chenridong@...wei.com>

The memcg LRU was originally introduced to improve scalability during
global reclaim, but it only supports gen lru global reclaim and remains
complex in implementation.

Previous patches have introduced heat-level-based memcg reclaim, which is
significantly simpler. This patch switches gen lru global reclaim to the
heat-level-based reclaim mechanism.

The following results are from a 24-hour test provided by YU Zhao [1]:

Throughput (number of requests)         before     after        Change
Total                                   22879701    25331956      +10%

Tail latency (number of requests)       before     after        Change
[128s, inf)                             19197	    15628	-19%
[64s, 128s)                             4500	    3815	-29%
[32s, 64s)                              14971	    13755	-36%
[16s, 32s)                              46117	    42942	-7%

[1] https://lore.kernel.org/all/20221220214923.1229538-1-yuzhao@google.com/
Signed-off-by: Chen Ridong <chenridong@...wei.com>
---
 mm/vmscan.c | 101 ++++++++++++----------------------------------------
 1 file changed, 22 insertions(+), 79 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27c6fdbc9394..f806838c3cea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4965,76 +4965,6 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
 	       MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
 }
 
-static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
-{
-	int op;
-	int gen;
-	int bin;
-	int first_bin;
-	struct lruvec *lruvec;
-	struct lru_gen_folio *lrugen;
-	struct mem_cgroup *memcg;
-	struct hlist_nulls_node *pos;
-
-	gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
-	bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
-restart:
-	op = 0;
-	memcg = NULL;
-
-	rcu_read_lock();
-
-	hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
-		if (op) {
-			lru_gen_rotate_memcg(lruvec, op);
-			op = 0;
-		}
-
-		mem_cgroup_put(memcg);
-		memcg = NULL;
-
-		if (gen != READ_ONCE(lrugen->gen))
-			continue;
-
-		lruvec = container_of(lrugen, struct lruvec, lrugen);
-		memcg = lruvec_memcg(lruvec);
-
-		if (!mem_cgroup_tryget(memcg)) {
-			lru_gen_release_memcg(memcg);
-			memcg = NULL;
-			continue;
-		}
-
-		rcu_read_unlock();
-
-		op = shrink_one(lruvec, sc);
-
-		rcu_read_lock();
-
-		if (lru_gen_should_abort_scan(lruvec, sc))
-			break;
-	}
-
-	rcu_read_unlock();
-
-	if (op)
-		lru_gen_rotate_memcg(lruvec, op);
-
-	mem_cgroup_put(memcg);
-
-	if (!is_a_nulls(pos))
-		return;
-
-	/* restart if raced with lru_gen_rotate_memcg() */
-	if (gen != get_nulls_value(pos))
-		goto restart;
-
-	/* try the rest of the bins of the current generation */
-	bin = get_memcg_bin(bin + 1);
-	if (bin != first_bin)
-		goto restart;
-}
-
 static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 {
 	struct blk_plug plug;
@@ -5064,6 +4994,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
 	blk_finish_plug(&plug);
 }
 
+static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc);
 static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
 {
 	struct blk_plug plug;
@@ -5093,7 +5024,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
 	if (mem_cgroup_disabled())
 		shrink_one(&pgdat->__lruvec, sc);
 	else
-		shrink_many(pgdat, sc);
+		shrink_node_memcgs(pgdat, sc);
 
 	if (current_is_kswapd())
 		sc->nr_reclaimed += reclaimed;
@@ -5800,6 +5731,11 @@ static bool lru_gen_should_abort_scan(struct lruvec *lruvec, struct scan_control
 {
 	return false;
 }
+
+static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
+{
+	BUILD_BUG();
+}
 #endif /* CONFIG_LRU_GEN */
 
 static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
@@ -5813,11 +5749,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
 	bool proportional_reclaim;
 	struct blk_plug plug;
 
-	if (lru_gen_enabled() && !root_reclaim(sc)) {
-		lru_gen_shrink_lruvec(lruvec, sc);
-		return;
-	}
-
 	get_scan_count(lruvec, sc, nr);
 
 	/* Record the original scan target for proportional adjustments later */
@@ -6127,7 +6058,8 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 	 * For kswapd, reliable forward progress is more important
 	 * than a quick return to idle. Always do full walks.
 	 */
-	if (current_is_kswapd() || sc->memcg_full_walk)
+	if ((current_is_kswapd() && lru_gen_enabled())
+	    || sc->memcg_full_walk)
 		partial = NULL;
 
 	for (level = MEMCG_LEVEL_COLD; level < max_level; level++) {
@@ -6178,7 +6110,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 			reclaimed = sc->nr_reclaimed;
 			scanned = sc->nr_scanned;
 
-			shrink_lruvec(lruvec, sc);
+			if (lru_gen_enabled()) {
+				if (!lruvec_is_sizable(lruvec, sc))
+					continue;
+				lru_gen_shrink_lruvec(lruvec, sc);
+			} else
+				shrink_lruvec(lruvec, sc);
+
 			if (!memcg || memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B))
 				shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
 					    sc->priority);
@@ -6196,7 +6134,12 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 
 			flush_reclaim_state(sc);
 			/* If partial walks are allowed, bail once goal is reached */
-			if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
+			if (lru_gen_enabled() && root_reclaim(sc)) {
+				if (lru_gen_should_abort_scan(lruvec, sc)) {
+					mem_cgroup_iter_break(target_memcg, memcg);
+					break;
+				}
+			} else if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
 				mem_cgroup_iter_break(target_memcg, memcg);
 				break;
 			}
-- 
2.34.1


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ