[<prev] [next>] [day] [month] [year] [list]
Message-Id: <20251119083722.1365680-1-chenridong@huaweicloud.com>
Date: Wed, 19 Nov 2025 08:37:22 +0000
From: Chen Ridong <chenridong@...weicloud.com>
To: akpm@...ux-foundation.org,
david@...nel.org,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
axelrasmussen@...gle.com,
yuanchu@...gle.com,
weixugc@...gle.com,
hannes@...xchg.org,
zhengqi.arch@...edance.com,
shakeel.butt@...ux.dev
Cc: linux-mm@...ck.org,
linux-kernel@...r.kernel.org,
lujialin4@...wei.com,
chenridong@...wei.com
Subject: [RFC -next] memcg: Optimize creation performance when LRU_GEN is enabled
From: Chen Ridong <chenridong@...wei.com>
With LRU_GEN=y and LRU_GEN_ENABLED=n, a performance regression occurs
when creating a large number of memory cgroups (memcgs):
# time mkdir testcg_{1..10000}
real 0m7.167s
user 0m0.037s
sys 0m6.773s
# time mkdir testcg_{1..20000}
real 0m27.158s
user 0m0.079s
sys 0m26.270s
In contrast, with LRU_GEN=n, creation of the same number of memcgs
performs better:
# time mkdir testcg_{1..10000}
real 0m3.386s
user 0m0.044s
sys 0m3.009s
# time mkdir testcg_{1..20000}
real 0m6.876s
user 0m0.075s
sys 0m6.121s
The root cause is that lru_gen node onlining uses hlist_nulls_add_tail_rcu,
which traverses the entire list to find the tail. This traversal scales
with the number of memcgs, even when LRU_GEN is runtime-disabled.
Fix this by adding a per-lru_gen tail pointer to track the list's tail.
Appending new nodes now uses the tail pointer directly, eliminating full
list traversal.
After applying this patch, memcg creation performance with LRU_GEN=y
matches the fully disabled baseline:
#time mkdir testcg_{1..10000}
real 0m3.368s
user 0m0.025s
sys 0m3.012s
# time mkdir testcg_{1..20000}
real 0m6.742s
user 0m0.085s
sys 0m5.995s
Signed-off-by: Chen Ridong <chenridong@...wei.com>
---
include/linux/mmzone.h | 4 +++
mm/vmscan.c | 78 ++++++++++++++++++++++++++++++++++++++----
2 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4398e027f450..bdee57b35126 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -513,6 +513,8 @@ struct lru_gen_folio {
u8 gen;
/* the list segment this lru_gen_folio belongs to */
u8 seg;
+ /* the bin index this lru_gen_folio is queued on */
+ u8 bin;
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_node list;
};
@@ -610,6 +612,8 @@ struct lru_gen_memcg {
unsigned long nr_memcgs[MEMCG_NR_GENS];
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
+ /* cached tails to speed up enqueueing */
+ struct hlist_nulls_node *tails[MEMCG_NR_GENS][MEMCG_NR_BINS];
/* protects the above */
spinlock_t lock;
};
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8890f4b58673..6c2665e48f19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4299,6 +4299,66 @@ enum {
MEMCG_LRU_YOUNG,
};
+static void memcg_lru_add_head_locked(struct pglist_data *pgdat,
+ struct lruvec *lruvec, int gen, int bin)
+{
+ struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+ struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+ struct hlist_nulls_node *node = &lruvec->lrugen.list;
+ bool empty = !memcg_lru->tails[gen][bin];
+
+ hlist_nulls_add_head_rcu(node, head);
+ lruvec->lrugen.bin = bin;
+
+ if (empty)
+ memcg_lru->tails[gen][bin] = node;
+}
+
+static void memcg_lru_add_tail_locked(struct pglist_data *pgdat,
+ struct lruvec *lruvec, int gen, int bin)
+{
+ struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+ struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+ struct hlist_nulls_node *node = &lruvec->lrugen.list;
+ struct hlist_nulls_node *tail = memcg_lru->tails[gen][bin];
+
+ if (tail) {
+ WRITE_ONCE(node->next, tail->next);
+ WRITE_ONCE(node->pprev, &tail->next);
+ rcu_assign_pointer(hlist_nulls_next_rcu(tail), node);
+ } else {
+ hlist_nulls_add_head_rcu(node, head);
+ }
+
+ memcg_lru->tails[gen][bin] = node;
+ lruvec->lrugen.bin = bin;
+}
+
+static void memcg_lru_del_locked(struct pglist_data *pgdat, struct lruvec *lruvec,
+ bool reinit)
+{
+ int gen = lruvec->lrugen.gen;
+ int bin = lruvec->lrugen.bin;
+ struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+ struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+ struct hlist_nulls_node *node = &lruvec->lrugen.list;
+ struct hlist_nulls_node *prev = NULL;
+
+ if (hlist_nulls_unhashed(node))
+ return;
+
+ if (memcg_lru->tails[gen][bin] == node) {
+ if (node->pprev != &head->first)
+ prev = container_of(node->pprev, struct hlist_nulls_node, next);
+ memcg_lru->tails[gen][bin] = prev;
+ }
+
+ if (reinit)
+ hlist_nulls_del_init_rcu(node);
+ else
+ hlist_nulls_del_rcu(node);
+}
+
static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
{
int seg;
@@ -4326,15 +4386,15 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
else
VM_WARN_ON_ONCE(true);
+ memcg_lru_del_locked(pgdat, lruvec, false);
+
WRITE_ONCE(lruvec->lrugen.seg, seg);
WRITE_ONCE(lruvec->lrugen.gen, new);
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
-
if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
- hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+ memcg_lru_add_head_locked(pgdat, lruvec, new, bin);
else
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+ memcg_lru_add_tail_locked(pgdat, lruvec, new, bin);
pgdat->memcg_lru.nr_memcgs[old]--;
pgdat->memcg_lru.nr_memcgs[new]++;
@@ -4365,7 +4425,7 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg)
lruvec->lrugen.gen = gen;
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
+ memcg_lru_add_tail_locked(pgdat, lruvec, gen, bin);
pgdat->memcg_lru.nr_memcgs[gen]++;
spin_unlock_irq(&pgdat->memcg_lru.lock);
@@ -4399,7 +4459,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
gen = lruvec->lrugen.gen;
- hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
+ memcg_lru_del_locked(pgdat, lruvec, true);
pgdat->memcg_lru.nr_memcgs[gen]--;
if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
@@ -5664,8 +5724,10 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
spin_lock_init(&pgdat->memcg_lru.lock);
for (i = 0; i < MEMCG_NR_GENS; i++) {
- for (j = 0; j < MEMCG_NR_BINS; j++)
+ for (j = 0; j < MEMCG_NR_BINS; j++) {
INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
+ pgdat->memcg_lru.tails[i][j] = NULL;
+ }
}
}
@@ -5687,6 +5749,8 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
if (mm_state)
mm_state->seq = MIN_NR_GENS;
+
+ lrugen->bin = 0;
}
#ifdef CONFIG_MEMCG
--
2.34.1
Powered by blists - more mailing lists