[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20260120134256.2271710-6-chenridong@huaweicloud.com>
Date: Tue, 20 Jan 2026 13:42:54 +0000
From: Chen Ridong <chenridong@...weicloud.com>
To: akpm@...ux-foundation.org,
axelrasmussen@...gle.com,
yuanchu@...gle.com,
weixugc@...gle.com,
david@...nel.org,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
corbet@....net,
skhan@...uxfoundation.org,
hannes@...xchg.org,
roman.gushchin@...ux.dev,
shakeel.butt@...ux.dev,
muchun.song@...ux.dev,
zhengqi.arch@...edance.com
Cc: linux-mm@...ck.org,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org,
cgroups@...r.kernel.org,
lujialin4@...wei.com,
chenridong@...weicloud.com,
ryncsn@...il.com
Subject: [RFC PATCH -next 5/7] mm/mglru: combine shrink_many into shrink_node_memcgs
From: Chen Ridong <chenridong@...wei.com>
The memcg LRU was originally introduced to improve scalability during
global reclaim, but it only supports gen lru global reclaim and remains
complex in implementation.
Previous patches have introduced heat-level-based memcg reclaim, which is
significantly simpler. This patch switches gen lru global reclaim to the
heat-level-based reclaim mechanism.
The following results are from a 24-hour test provided by YU Zhao [1]:
Throughput (number of requests) before after Change
Total 22879701 25331956 +10%
Tail latency (number of requests) before after Change
[128s, inf) 19197 15628 -19%
[64s, 128s) 4500 3815 -29%
[32s, 64s) 14971 13755 -36%
[16s, 32s) 46117 42942 -7%
[1] https://lore.kernel.org/all/20221220214923.1229538-1-yuzhao@google.com/
Signed-off-by: Chen Ridong <chenridong@...wei.com>
---
mm/vmscan.c | 101 ++++++++++++----------------------------------------
1 file changed, 22 insertions(+), 79 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27c6fdbc9394..f806838c3cea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4965,76 +4965,6 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
}
-static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
-{
- int op;
- int gen;
- int bin;
- int first_bin;
- struct lruvec *lruvec;
- struct lru_gen_folio *lrugen;
- struct mem_cgroup *memcg;
- struct hlist_nulls_node *pos;
-
- gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
- bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
-restart:
- op = 0;
- memcg = NULL;
-
- rcu_read_lock();
-
- hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
- if (op) {
- lru_gen_rotate_memcg(lruvec, op);
- op = 0;
- }
-
- mem_cgroup_put(memcg);
- memcg = NULL;
-
- if (gen != READ_ONCE(lrugen->gen))
- continue;
-
- lruvec = container_of(lrugen, struct lruvec, lrugen);
- memcg = lruvec_memcg(lruvec);
-
- if (!mem_cgroup_tryget(memcg)) {
- lru_gen_release_memcg(memcg);
- memcg = NULL;
- continue;
- }
-
- rcu_read_unlock();
-
- op = shrink_one(lruvec, sc);
-
- rcu_read_lock();
-
- if (lru_gen_should_abort_scan(lruvec, sc))
- break;
- }
-
- rcu_read_unlock();
-
- if (op)
- lru_gen_rotate_memcg(lruvec, op);
-
- mem_cgroup_put(memcg);
-
- if (!is_a_nulls(pos))
- return;
-
- /* restart if raced with lru_gen_rotate_memcg() */
- if (gen != get_nulls_value(pos))
- goto restart;
-
- /* try the rest of the bins of the current generation */
- bin = get_memcg_bin(bin + 1);
- if (bin != first_bin)
- goto restart;
-}
-
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
struct blk_plug plug;
@@ -5064,6 +4994,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
blk_finish_plug(&plug);
}
+static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc);
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
{
struct blk_plug plug;
@@ -5093,7 +5024,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
if (mem_cgroup_disabled())
shrink_one(&pgdat->__lruvec, sc);
else
- shrink_many(pgdat, sc);
+ shrink_node_memcgs(pgdat, sc);
if (current_is_kswapd())
sc->nr_reclaimed += reclaimed;
@@ -5800,6 +5731,11 @@ static bool lru_gen_should_abort_scan(struct lruvec *lruvec, struct scan_control
{
return false;
}
+
+static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
+{
+ BUILD_BUG();
+}
#endif /* CONFIG_LRU_GEN */
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
@@ -5813,11 +5749,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
bool proportional_reclaim;
struct blk_plug plug;
- if (lru_gen_enabled() && !root_reclaim(sc)) {
- lru_gen_shrink_lruvec(lruvec, sc);
- return;
- }
-
get_scan_count(lruvec, sc, nr);
/* Record the original scan target for proportional adjustments later */
@@ -6127,7 +6058,8 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
* For kswapd, reliable forward progress is more important
* than a quick return to idle. Always do full walks.
*/
- if (current_is_kswapd() || sc->memcg_full_walk)
+ if ((current_is_kswapd() && lru_gen_enabled())
+ || sc->memcg_full_walk)
partial = NULL;
for (level = MEMCG_LEVEL_COLD; level < max_level; level++) {
@@ -6178,7 +6110,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned;
- shrink_lruvec(lruvec, sc);
+ if (lru_gen_enabled()) {
+ if (!lruvec_is_sizable(lruvec, sc))
+ continue;
+ lru_gen_shrink_lruvec(lruvec, sc);
+ } else
+ shrink_lruvec(lruvec, sc);
+
if (!memcg || memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B))
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
sc->priority);
@@ -6196,7 +6134,12 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
flush_reclaim_state(sc);
/* If partial walks are allowed, bail once goal is reached */
- if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
+ if (lru_gen_enabled() && root_reclaim(sc)) {
+ if (lru_gen_should_abort_scan(lruvec, sc)) {
+ mem_cgroup_iter_break(target_memcg, memcg);
+ break;
+ }
+ } else if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
mem_cgroup_iter_break(target_memcg, memcg);
break;
}
--
2.34.1
Powered by blists - more mailing lists