[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250920015526.246554-1-inwardvessel@gmail.com>
Date: Fri, 19 Sep 2025 18:55:26 -0700
From: JP Kobryn <inwardvessel@...il.com>
To: shakeel.butt@...ux.dev,
mkoutny@...e.com,
yosryahmed@...gle.com,
hannes@...xchg.org,
tj@...nel.org,
akpm@...ux-foundation.org
Cc: linux-kernel@...r.kernel.org,
cgroups@...r.kernel.org,
kernel-team@...a.com
Subject: [RFC PATCH] memcg: introduce kfuncs for fetching memcg stats
The kernel has to perform a significant amount of the work when a user mode
program reads the memory.stat file of a cgroup. Aside from flushing stats,
there is overhead in the string formatting that is done for each stat. Some
perf data is shown below from a program that reads memory.stat 1M times:
26.75% a.out [kernel.kallsyms] [k] vsnprintf
19.88% a.out [kernel.kallsyms] [k] format_decode
12.11% a.out [kernel.kallsyms] [k] number
11.72% a.out [kernel.kallsyms] [k] string
8.46% a.out [kernel.kallsyms] [k] strlen
4.22% a.out [kernel.kallsyms] [k] seq_buf_printf
2.79% a.out [kernel.kallsyms] [k] memory_stat_format
1.49% a.out [kernel.kallsyms] [k] put_dec_trunc8
1.45% a.out [kernel.kallsyms] [k] widen_string
1.01% a.out [kernel.kallsyms] [k] memcpy_orig
As an alternative to reading memory.stat, introduce new kfuncs to allow
fetching specific memcg stats from within bpf iter/cgroup-based programs.
Reading stats in this manner avoids the overhead of the string formatting
shown above.
Signed-off-by: JP Kobryn <inwardvessel@...il.com>
---
mm/memcontrol.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8dd7fbed5a94..aa22dc6f47ee 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -870,6 +870,73 @@ unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
}
#endif
+static inline struct mem_cgroup *mem_cgroup_from_cgroup(struct cgroup *cgrp)
+{
+ return cgrp ? mem_cgroup_from_css(cgrp->subsys[memory_cgrp_id]) : NULL;
+}
+
+__bpf_kfunc static void cgroup_flush_memcg_stats(struct cgroup *cgrp)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cgroup(cgrp);
+
+ if (!memcg)
+ return;
+
+ mem_cgroup_flush_stats(memcg);
+}
+
+__bpf_kfunc static unsigned long node_stat_fetch(struct cgroup *cgrp,
+ enum node_stat_item item)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cgroup(cgrp);
+
+ if (!memcg)
+ return 0;
+
+ return memcg_page_state_output(memcg, item);
+}
+
+__bpf_kfunc static unsigned long memcg_stat_fetch(struct cgroup *cgrp,
+ enum memcg_stat_item item)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cgroup(cgrp);
+
+ if (!memcg)
+ return 0;
+
+ return memcg_page_state_output(memcg, item);
+}
+
+__bpf_kfunc static unsigned long vm_event_fetch(struct cgroup *cgrp,
+ enum vm_event_item item)
+{
+ struct mem_cgroup *memcg = mem_cgroup_from_cgroup(cgrp);
+
+ if (!memcg)
+ return 0;
+
+ return memcg_events(memcg, item);
+}
+
+BTF_KFUNCS_START(bpf_memcontrol_kfunc_ids)
+BTF_ID_FLAGS(func, cgroup_flush_memcg_stats)
+BTF_ID_FLAGS(func, node_stat_fetch)
+BTF_ID_FLAGS(func, memcg_stat_fetch)
+BTF_ID_FLAGS(func, vm_event_fetch)
+BTF_KFUNCS_END(bpf_memcontrol_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_memcontrol_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_memcontrol_kfunc_ids,
+};
+
+static int __init bpf_memcontrol_kfunc_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING,
+ &bpf_memcontrol_kfunc_set);
+}
+late_initcall(bpf_memcontrol_kfunc_init);
+
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
{
/*
--
2.47.3
Powered by blists - more mailing lists