[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20260121231140.832332-2-tj@kernel.org>
Date: Wed, 21 Jan 2026 13:11:07 -1000
From: Tejun Heo <tj@...nel.org>
To: linux-kernel@...r.kernel.org,
sched-ext@...ts.linux.dev
Cc: void@...ifault.com,
andrea.righi@...ux.dev,
changwoo@...lia.com,
emil@...alapatis.com,
Tejun Heo <tj@...nel.org>
Subject: [PATCH 01/34] sched_ext: Implement cgroup subtree iteration for scx_task_iter
For the planned cgroup sub-scheduler support, enable/disable operations are
going to be subtree specific and iterating all tasks in the system for those
operations can be unnecessarily expensive and disruptive.
cgroup already has mechanisms to perform subtree task iterations. Implement
cgroup subtree iteration for scx_task_iter:
- Add optional @cgrp to scx_task_iter_start() which enables cgroup subtree
iteration.
- Make scx_task_iter use css_next_descendant_pre() and css_task_iter to
iterate all tasks in the cgroup subtree.
- Update all existing callers to pass NULL to maintain current behavior.
The two iteration mechanisms are independent and duplicate. It's likely that
scx_tasks can be removed in favor of always using cgroup iteration if
CONFIG_SCHED_CLASS_EXT depends on CONFIG_CGROUPS.
Signed-off-by: Tejun Heo <tj@...nel.org>
---
kernel/sched/ext.c | 64 +++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 58 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index bd2fed7a1c56..ba2a72752e13 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -513,14 +513,31 @@ struct scx_task_iter {
struct rq_flags rf;
u32 cnt;
bool list_locked;
+#ifdef CONFIG_CGROUPS
+ struct cgroup *cgrp;
+ struct cgroup_subsys_state *css_pos;
+ struct css_task_iter css_iter;
+#endif
};
/**
* scx_task_iter_start - Lock scx_tasks_lock and start a task iteration
* @iter: iterator to init
+ * @cgrp: Optional root of cgroup subhierarchy to iterate
+ *
+ * Initialize @iter. Once initialized, @iter must eventually be stopped with
+ * scx_task_iter_stop().
+ *
+ * If @cgrp is %NULL, scx_tasks is used for iteration and this function returns
+ * with scx_tasks_lock held and @iter->cursor inserted into scx_tasks.
+ *
+ * If @cgrp is not %NULL, @cgrp and its descendants' tasks are walked using
+ * @iter->css_iter. The caller must be holding cgroup_lock() to prevent cgroup
+ * task migrations.
*
- * Initialize @iter and return with scx_tasks_lock held. Once initialized, @iter
- * must eventually be stopped with scx_task_iter_stop().
+ * The two modes of iterations are largely independent and it's likely that
+ * scx_tasks can be removed in favor of always using cgroup iteration if
+ * CONFIG_SCHED_CLASS_EXT depends on CONFIG_CGROUPS.
*
* scx_tasks_lock and the rq lock may be released using scx_task_iter_unlock()
* between this and the first next() call or between any two next() calls. If
@@ -531,10 +548,19 @@ struct scx_task_iter {
* All tasks which existed when the iteration started are guaranteed to be
* visited as long as they are not dead.
*/
-static void scx_task_iter_start(struct scx_task_iter *iter)
+static void scx_task_iter_start(struct scx_task_iter *iter, struct cgroup *cgrp)
{
memset(iter, 0, sizeof(*iter));
+#ifdef CONFIG_CGROUPS
+ if (cgrp) {
+ lockdep_assert_held(&cgroup_mutex);
+ iter->cgrp = cgrp;
+ iter->css_pos = css_next_descendant_pre(NULL, &iter->cgrp->self);
+ css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
+ return;
+ }
+#endif
raw_spin_lock_irq(&scx_tasks_lock);
iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR };
@@ -586,6 +612,14 @@ static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter)
*/
static void scx_task_iter_stop(struct scx_task_iter *iter)
{
+#ifdef CONFIG_CGROUPS
+ if (iter->cgrp) {
+ if (iter->css_pos)
+ css_task_iter_end(&iter->css_iter);
+ __scx_task_iter_rq_unlock(iter);
+ return;
+ }
+#endif
__scx_task_iter_maybe_relock(iter);
list_del_init(&iter->cursor.tasks_node);
scx_task_iter_unlock(iter);
@@ -609,6 +643,24 @@ static struct task_struct *scx_task_iter_next(struct scx_task_iter *iter)
cond_resched();
}
+#ifdef CONFIG_CGROUPS
+ if (iter->cgrp) {
+ while (iter->css_pos) {
+ struct task_struct *p;
+
+ p = css_task_iter_next(&iter->css_iter);
+ if (p)
+ return p;
+
+ css_task_iter_end(&iter->css_iter);
+ iter->css_pos = css_next_descendant_pre(iter->css_pos,
+ &iter->cgrp->self);
+ if (iter->css_pos)
+ css_task_iter_start(iter->css_pos, 0, &iter->css_iter);
+ }
+ return NULL;
+ }
+#endif
__scx_task_iter_maybe_relock(iter);
list_for_each_entry(pos, cursor, tasks_node) {
@@ -4256,7 +4308,7 @@ static void scx_disable_workfn(struct kthread_work *work)
scx_init_task_enabled = false;
- scx_task_iter_start(&sti);
+ scx_task_iter_start(&sti, NULL);
while ((p = scx_task_iter_next_locked(&sti))) {
unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK;
const struct sched_class *old_class = p->sched_class;
@@ -5025,7 +5077,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (ret)
goto err_disable_unlock_all;
- scx_task_iter_start(&sti);
+ scx_task_iter_start(&sti, NULL);
while ((p = scx_task_iter_next_locked(&sti))) {
/*
* @p may already be dead, have lost all its usages counts and
@@ -5067,7 +5119,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
* scx_tasks_lock.
*/
percpu_down_write(&scx_fork_rwsem);
- scx_task_iter_start(&sti);
+ scx_task_iter_start(&sti, NULL);
while ((p = scx_task_iter_next_locked(&sti))) {
unsigned int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
const struct sched_class *old_class = p->sched_class;
--
2.52.0
Powered by blists - more mailing lists