[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230823174804.23632-2-mkoutny@suse.com>
Date: Wed, 23 Aug 2023 19:48:03 +0200
From: Michal Koutný <mkoutny@...e.com>
To: cgroups@...r.kernel.org, linux-kernel@...r.kernel.org
Cc: Tejun Heo <tj@...nel.org>, Zefan Li <lizefan.x@...edance.com>,
Johannes Weiner <hannes@...xchg.org>,
Firo Yang <firo.yang@...e.com>
Subject: [PATCH 1/2] cgroup: Print v1 cgroup.procs and tasks without pidlists
pidlists are structure that allows printing cgroup member tasks in
sorted order and with proper pidns (in)visibility.
The promise of sorted output was removed in the commit 7823da36ce8e
("cgroups: update documentation of cgroups tasks and procs files") (more
than 13 years ago at the time of writing this).
On systems that still use v1 hierarchies (e.g. systemd in non-unified
mode), pidlists are problematic because:
a) their cache unnecessarily busies workqueues (cgroup_pidlist_destroy_wq)
b) PID recycling [1] may lead to logging noise:
> seq_file: buggy .next function kernfs_seq_next did not update position index
It is possible to reuse cgroup v2 code that relies directly on css_set
iterator without caching (effectively extracting css_task_iter_* calls
from pidlist_array_load()).
We only need to make a provision for pidns by skipping external tasks
(instead of printing '0' like in v2).
[1] cgroup v1 code uses PID as the iterator position, PID recycling causes
a repetition of the same PID at the end of (`tasks`) pidlist and
seq_file interprets this as a non-incremented index. (seq_file code
corrects it by PID++, which should be harmless unless tasks file is
read byte by byte).
Signed-off-by: Michal Koutný <mkoutny@...e.com>
---
kernel/cgroup/cgroup-internal.h | 5 +++++
kernel/cgroup/cgroup-v1.c | 32 ++++++++++++++++++++++++--------
kernel/cgroup/cgroup.c | 8 ++++----
3 files changed, 33 insertions(+), 12 deletions(-)
diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
index c56071f150f2..8edf7aeac159 100644
--- a/kernel/cgroup/cgroup-internal.h
+++ b/kernel/cgroup/cgroup-internal.h
@@ -264,6 +264,11 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
int __cgroup_task_count(const struct cgroup *cgrp);
int cgroup_task_count(const struct cgroup *cgrp);
+void cgroup_procs_release(struct kernfs_open_file *of);
+void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos);
+void *cgroup_procs_start(struct seq_file *s, loff_t *pos);
+void *cgroup_threads_start(struct seq_file *s, loff_t *pos);
+
/*
* rstat.c
*/
diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
index 83044312bc41..7c0945ccba0d 100644
--- a/kernel/cgroup/cgroup-v1.c
+++ b/kernel/cgroup/cgroup-v1.c
@@ -486,6 +486,22 @@ static int cgroup_pidlist_show(struct seq_file *s, void *v)
return 0;
}
+static int cgroup1_procs_show(struct seq_file *s, void *v)
+{
+ pid_t pid;
+
+ /* Print PID both for `tasks` file (threads) and `cgroup.procs`
+ * (processes), the latter iterates with CSS_TASK_ITER_PROCS hence we
+ * get PIDs of thread group leaders, i.e. tgids.
+ */
+ pid = task_pid_vnr(v);
+ if (!pid)
+ return SEQ_SKIP;
+
+ seq_printf(s, "%d\n", pid);
+ return 0;
+}
+
static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off,
bool threadgroup)
@@ -623,11 +639,11 @@ static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
struct cftype cgroup1_base_files[] = {
{
.name = "cgroup.procs",
- .seq_start = cgroup_pidlist_start,
- .seq_next = cgroup_pidlist_next,
- .seq_stop = cgroup_pidlist_stop,
- .seq_show = cgroup_pidlist_show,
.private = CGROUP_FILE_PROCS,
+ .release = cgroup_procs_release,
+ .seq_start = cgroup_procs_start,
+ .seq_next = cgroup_procs_next,
+ .seq_show = cgroup1_procs_show,
.write = cgroup1_procs_write,
},
{
@@ -642,11 +658,11 @@ struct cftype cgroup1_base_files[] = {
},
{
.name = "tasks",
- .seq_start = cgroup_pidlist_start,
- .seq_next = cgroup_pidlist_next,
- .seq_stop = cgroup_pidlist_stop,
- .seq_show = cgroup_pidlist_show,
.private = CGROUP_FILE_TASKS,
+ .release = cgroup_procs_release,
+ .seq_start = cgroup_threads_start,
+ .seq_next = cgroup_procs_next,
+ .seq_show = cgroup1_procs_show,
.write = cgroup1_tasks_write,
},
{
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index f55a40db065f..3c5ba2ca7852 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4963,7 +4963,7 @@ void css_task_iter_end(struct css_task_iter *it)
put_task_struct(it->cur_task);
}
-static void cgroup_procs_release(struct kernfs_open_file *of)
+void cgroup_procs_release(struct kernfs_open_file *of)
{
struct cgroup_file_ctx *ctx = of->priv;
@@ -4971,7 +4971,7 @@ static void cgroup_procs_release(struct kernfs_open_file *of)
css_task_iter_end(&ctx->procs.iter);
}
-static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
+void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
{
struct kernfs_open_file *of = s->private;
struct cgroup_file_ctx *ctx = of->priv;
@@ -5008,7 +5008,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
return cgroup_procs_next(s, NULL, NULL);
}
-static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
+void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
{
struct cgroup *cgrp = seq_css(s)->cgroup;
@@ -5152,7 +5152,7 @@ static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
return __cgroup_procs_write(of, buf, true) ?: nbytes;
}
-static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
+void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
{
return __cgroup_procs_start(s, pos, 0);
}
--
2.41.0
Powered by blists - more mailing lists