[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260121123955.84806-3-jiayuan.chen@linux.dev>
Date: Wed, 21 Jan 2026 20:39:48 +0800
From: Jiayuan Chen <jiayuan.chen@...ux.dev>
To: linux-mm@...ck.org
Cc: Jiayuan Chen <jiayuan.chen@...pee.com>,
Tejun Heo <tj@...nel.org>,
Johannes Weiner <hannes@...xchg.org>,
Michal Koutný <mkoutny@...e.com>,
Jonathan Corbet <corbet@....net>,
Andrew Morton <akpm@...ux-foundation.org>,
David Hildenbrand <david@...nel.org>,
Lorenzo Stoakes <lorenzo.stoakes@...cle.com>,
"Liam R. Howlett" <Liam.Howlett@...cle.com>,
Vlastimil Babka <vbabka@...e.cz>,
Mike Rapoport <rppt@...nel.org>,
Suren Baghdasaryan <surenb@...gle.com>,
Michal Hocko <mhocko@...e.com>,
Axel Rasmussen <axelrasmussen@...gle.com>,
Yuanchu Xie <yuanchu@...gle.com>,
Wei Xu <weixugc@...gle.com>,
Roman Gushchin <roman.gushchin@...ux.dev>,
Shakeel Butt <shakeel.butt@...ux.dev>,
Muchun Song <muchun.song@...ux.dev>,
Qi Zheng <zhengqi.arch@...edance.com>,
cgroups@...r.kernel.org,
linux-doc@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: [RFC PATCH 2/3] mm/lru_gen: add memory.lru_gen interface for cgroup v2
From: Jiayuan Chen <jiayuan.chen@...pee.com>
Add a memory.lru_gen interface to cgroup v2 that allows users to
interact with MGLRU directly on a specific cgroup without needing
to know the internal memcg_id.
Read (cat memory.lru_gen):
Display lru_gen information for this memcg across all NUMA nodes.
Write (echo to memory.lru_gen):
Execute aging or eviction commands.
Format: cmd nid seq [swappiness] [opt]
cmd: '+' for aging, '-' for eviction
nid: node id
seq: generation sequence number
swappiness: optional, or 'max' for anonymous memory only
opt: force_scan for aging, nr_to_reclaim for eviction
Example:
# Show lru_gen info
cat /sys/fs/cgroup/mygroup/memory.lru_gen
# Run aging on node 0 with seq 100
echo '+ 0 100' > /sys/fs/cgroup/mygroup/memory.lru_gen
# Run eviction on node 0 with seq 99, swappiness 50, reclaim 1000 pages
echo '- 0 99 50 1000' > /sys/fs/cgroup/mygroup/memory.lru_gen
Test result:
cgcreate -g memory:test_group
// create 1GB page cache, loop access 200MB as hot pages.
cgexec -g memory:test_group ./cache_sim &
cat /sys/fs/cgroup/test_group/memory.lru_gen
node 0 (min_seq=0/0, max_seq=3)
0 48744 0 4
1 48744 0 0
2 48744 24 262144
3 48744 0 0
node 1 (min_seq=0/0, max_seq=3)
0 48744 0 0
1 48744 0 0
2 48744 1 0
3 48744 0 0
// age
echo '+ 0 3' > /sys/fs/cgroup/test_group/memory.lru_gen
cat /sys/fs/cgroup/test_group/memory.lru_gen
memcg 35 /test_group
node 0 (min_seq=1/1, max_seq=4)
1 216564 0 3
2 216564 24 262144
3 216564 0 1
4 14936 0 0
node 1 (min_seq=0/0, max_seq=3)
0 216564 0 0
1 216564 0 0
2 216564 1 0
3 216564 0 0
// age
echo '+ 0 4' > /sys/fs/cgroup/test_group/memory.lru_gen
cat /sys/fs/cgroup/test_group/memory.lru_gen
memcg 35 /test_group
node 0 (min_seq=2/2, max_seq=5)
2 266880 23 210947
3 266880 0 1
4 65252 1 51200
5 9320 0 0
node 1 (min_seq=0/0, max_seq=3)
0 266880 0 0
1 266880 0 0
2 266880 1 0
3 266880 0 0
// age
echo '+ 0 5' > /sys/fs/cgroup/test_group/memory.lru_gen
node 0 (min_seq=3/3, max_seq=6)
3 318384 0 210948
4 116756 0 0
5 60824 1 51200
6 3408 23 0
node 1 (min_seq=0/0, max_seq=3)
0 318384 0 0
1 318384 0 0
2 318384 1 0
3 318384 0 0
// reclaim 200000 pages
echo '- 0 3 0 200000' > /sys/fs/cgroup/test_group/memory.lru_gen
cat /sys/fs/cgroup/test_group/memory.lru_gen
node 0 (min_seq=3/3, max_seq=6)
3 760308 0 10884
4 558680 0 0
5 502748 1 51200
6 445332 23 0
node 1 (min_seq=0/0, max_seq=3)
0 760308 0 0
1 760308 0 0
2 760308 1 0
3 760308 0 0
// reclaim 20000 pages
echo '- 0 3 0 20000' > /sys/fs/cgroup/test_group/memory.lru_gen
cat /sys/fs/cgroup/test_group/memory.lru_gen
node 0 (min_seq=3/5, max_seq=6)
3 826864 0 0
4 625236 0 0
5 569304 1 51201
6 511888 23 0
node 1 (min_seq=0/0, max_seq=3)
0 826864 0 0
1 826864 0 0
2 826864 1 0
3 826864 0 0
Signed-off-by: Jiayuan Chen <jiayuan.chen@...pee.com>
---
include/linux/mmzone.h | 16 +++++++
mm/memcontrol.c | 31 ++++++++++++++
mm/vmscan.c | 94 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 141 insertions(+)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fc5d6c88d2f0..8edb9549b435 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -625,6 +625,11 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg);
void lru_gen_release_memcg(struct mem_cgroup *memcg);
void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid);
+/* memcg interface */
+struct seq_file;
+void lru_gen_seq_show_memcg(struct seq_file *m, struct mem_cgroup *memcg);
+int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf);
+
#else /* !CONFIG_LRU_GEN */
static inline void lru_gen_init_pgdat(struct pglist_data *pgdat)
@@ -664,6 +669,17 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
{
}
+struct seq_file;
+static inline void lru_gen_seq_show_memcg(struct seq_file *m,
+ struct mem_cgroup *memcg)
+{
+}
+
+static inline int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf)
+{
+ return -EOPNOTSUPP;
+}
+
#endif /* CONFIG_LRU_GEN */
struct lruvec {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 75fc22a33b28..e2f13a69b891 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4565,6 +4565,30 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
return nbytes;
}
+#ifdef CONFIG_LRU_GEN
+static int memory_lru_gen_show(struct seq_file *m, void *v)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+
+ lru_gen_seq_show_memcg(m, memcg);
+ return 0;
+}
+
+static ssize_t memory_lru_gen_write(struct kernfs_open_file *of, char *buf,
+ size_t nbytes, loff_t off)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+ int ret;
+
+ buf = strstrip(buf);
+ ret = lru_gen_seq_write_memcg(memcg, buf);
+ if (ret)
+ return ret;
+
+ return nbytes;
+}
+#endif
+
static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
size_t nbytes, loff_t off)
{
@@ -4649,6 +4673,13 @@ static struct cftype memory_files[] = {
.flags = CFTYPE_NS_DELEGATABLE,
.write = memory_reclaim,
},
+#ifdef CONFIG_LRU_GEN
+ {
+ .name = "lru_gen",
+ .seq_show = memory_lru_gen_show,
+ .write = memory_lru_gen_write,
+ },
+#endif
{ } /* terminate */
};
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8ea5b67daa36..43f38f9f43c5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5668,6 +5668,100 @@ static const struct file_operations lru_gen_ro_fops = {
.release = seq_release,
};
+/******************************************************************************
+ * memcg interface
+ ******************************************************************************/
+
+void lru_gen_seq_show_memcg(struct seq_file *m, struct mem_cgroup *memcg)
+{
+ int nid;
+ char *path;
+
+ path = kvmalloc(PATH_MAX, GFP_KERNEL);
+#ifdef CONFIG_MEMCG
+ if (memcg && path)
+ cgroup_path(memcg->css.cgroup, path, PATH_MAX);
+#endif
+ seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg),
+ (memcg && path) ? path : "");
+ kvfree(path);
+
+ for_each_node_state(nid, N_MEMORY) {
+ struct lruvec *lruvec = get_lruvec(memcg, nid);
+ DEFINE_MAX_SEQ(lruvec);
+ DEFINE_MIN_SEQ(lruvec);
+
+ seq_printf(m, " node %5d (min_seq=%lu/%lu, max_seq=%lu)\n",
+ nid, min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE], max_seq);
+ lru_gen_print_lruvec(m, lruvec, max_seq, min_seq, false);
+ }
+}
+
+int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf)
+{
+ char *cur, *next;
+ unsigned int flags;
+ struct blk_plug plug;
+ int err = -EINVAL;
+ struct scan_control sc = {
+ .may_writepage = true,
+ .may_unmap = true,
+ .may_swap = true,
+ .reclaim_idx = MAX_NR_ZONES - 1,
+ .gfp_mask = GFP_KERNEL,
+ .proactive = true,
+ .target_mem_cgroup = memcg,
+ };
+
+ set_task_reclaim_state(current, &sc.reclaim_state);
+ flags = memalloc_noreclaim_save();
+ blk_start_plug(&plug);
+ if (!set_mm_walk(NULL, true)) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ next = buf;
+ while ((cur = strsep(&next, ",;\n"))) {
+ int n, end;
+ char cmd, swap_str[5];
+ unsigned int nid, swappiness = -1;
+ unsigned long seq, opt = -1;
+ struct lruvec *lruvec;
+
+ cur = skip_spaces(cur);
+ if (!*cur)
+ continue;
+
+ n = sscanf(cur, "%c %u %lu %n %4s %n %lu %n", &cmd, &nid,
+ &seq, &end, swap_str, &end, &opt, &end);
+ if (n < 3 || cur[end]) {
+ err = -EINVAL;
+ break;
+ }
+ if (n > 3 && strcmp("max", swap_str) == 0)
+ swappiness = SWAPPINESS_ANON_ONLY;
+ else if (n > 3 && kstrtouint(swap_str, 0, &swappiness))
+ break;
+
+ if (nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) {
+ err = -EINVAL;
+ break;
+ }
+
+ lruvec = get_lruvec(memcg, nid);
+ err = __run_cmd(cmd, lruvec, seq, &sc, swappiness, opt);
+ if (err)
+ break;
+ }
+done:
+ clear_mm_walk();
+ blk_finish_plug(&plug);
+ memalloc_noreclaim_restore(flags);
+ set_task_reclaim_state(current, NULL);
+ return err;
+}
+
/******************************************************************************
* initialization
******************************************************************************/
--
2.43.0
Powered by blists - more mailing lists