[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <20210311061752.310831-1-zhouchengming@bytedance.com>
Date: Thu, 11 Mar 2021 14:17:52 +0800
From: Chengming Zhou <zhouchengming@...edance.com>
To: tj@...nel.org, lizefan.x@...edance.com, hannes@...xchg.org,
corbet@....net, mingo@...hat.com, peterz@...radead.org,
juri.lelli@...hat.com, vincent.guittot@...aro.org
Cc: dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
mgorman@...e.de, bristot@...hat.com, cgroups@...r.kernel.org,
linux-doc@...r.kernel.org, linux-kernel@...r.kernel.org,
zhouchengming@...edance.com, songmuchun@...edance.com
Subject: [PATCH] cgroup-v2: Add taskstats counters in cgroup.stat
We have the netlink CGROUPSTATS_CMD_GET interface to get taskstats
of the cgroup on v1, but haven't the equivalent interface on v2,
making it difficult to calculate the per-cgroup cpu load in cadvisor
or implement the cgroup proc interface in lxcfs, like /proc/loadavg.
Since we already have these counters maintained in psi subsystem,
so this patch sum them up and export in the cgroup.stat interface.
Signed-off-by: Chengming Zhou <zhouchengming@...edance.com>
---
Documentation/admin-guide/cgroup-v2.rst | 9 +++++++
include/linux/psi.h | 1 +
kernel/cgroup/cgroup.c | 3 +++
kernel/sched/psi.c | 34 +++++++++++++++++++++++++
4 files changed, 47 insertions(+)
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 64c62b979f2f..4184e749f687 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -923,6 +923,15 @@ All cgroup core files are prefixed with "cgroup."
A dying cgroup can consume system resources not exceeding
limits, which were active at the moment of cgroup deletion.
+ nr_iowait_tasks
+ Total number of tasks in iowait.
+
+ nr_memstall_tasks
+ Total number of tasks in memstall.
+
+ nr_running_tasks
+ Total number of runnable tasks.
+
cgroup.freeze
A read-write single value file which exists on non-root cgroups.
Allowed values are "0" and "1". The default is "0".
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 7361023f3fdd..ea98239424ca 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -30,6 +30,7 @@ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
int psi_cgroup_alloc(struct cgroup *cgrp);
void psi_cgroup_free(struct cgroup *cgrp);
void cgroup_move_task(struct task_struct *p, struct css_set *to);
+void psi_taskstat_show(struct seq_file *m, struct cgroup *cgrp);
struct psi_trigger *psi_trigger_create(struct psi_group *group,
char *buf, size_t nbytes, enum psi_res res);
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 9153b20e5cc6..2724ae318a3b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3502,6 +3502,9 @@ static int cgroup_stat_show(struct seq_file *seq, void *v)
seq_printf(seq, "nr_dying_descendants %d\n",
cgroup->nr_dying_descendants);
+#ifdef CONFIG_PSI
+ psi_taskstat_show(seq, cgroup);
+#endif
return 0;
}
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 967732c0766c..0ae8bd278ca4 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -1000,6 +1000,40 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
task_rq_unlock(rq, task, &rf);
}
+
+void psi_taskstat_show(struct seq_file *m, struct cgroup *cgrp)
+{
+ struct psi_group *group;
+ int cpu;
+ int s;
+ unsigned int taskstat[NR_PSI_TASK_COUNTS - 1] = { 0, };
+
+ if (static_branch_likely(&psi_disabled))
+ return;
+
+ group = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
+
+ for_each_possible_cpu(cpu) {
+ struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
+ unsigned int tasks[NR_PSI_TASK_COUNTS];
+ unsigned int seq;
+
+ do {
+ seq = read_seqcount_begin(&groupc->seq);
+ memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
+ } while (read_seqcount_retry(&groupc->seq, seq));
+
+ for (s = 0; s < NR_ONCPU; s++)
+ taskstat[s] += tasks[s];
+ }
+
+ seq_printf(m, "nr_iowait_tasks %u\n"
+ "nr_memstall_tasks %u\n"
+ "nr_running_tasks %u\n",
+ taskstat[NR_IOWAIT],
+ taskstat[NR_MEMSTALL],
+ taskstat[NR_RUNNING]);
+}
#endif /* CONFIG_CGROUPS */
int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
--
2.25.1
Powered by blists - more mailing lists