[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250423234542.1890867-8-tj@kernel.org>
Date: Wed, 23 Apr 2025 13:44:45 -1000
From: Tejun Heo <tj@...nel.org>
To: David Vernet <void@...ifault.com>,
Andrea Righi <arighi@...dia.com>,
Changwoo Min <changwoo@...lia.com>,
linux-kernel@...r.kernel.org
Cc: Tejun Heo <tj@...nel.org>
Subject: [PATCH 07/12] sched_ext: Move global_dsqs into scx_sched
Global DSQs are going to become per scheduler instance. Move global_dsqs
into scx_sched. find_global_dsq() already takes a task_struct pointer as an
argument and should later be able to determine the scx_sched to use from
that. For now, assume scx_root.
Signed-off-by: Tejun Heo <tj@...nel.org>
---
kernel/sched/ext.c | 95 ++++++++++++++++++++++++----------------------
1 file changed, 49 insertions(+), 46 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index a086b024301f..5beb2dd868ad 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -767,7 +767,17 @@ struct scx_sched {
struct sched_ext_ops ops;
DECLARE_BITMAP(has_op, SCX_OPI_END);
+ /*
+ * Dispatch queues.
+ *
+ * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability.
+ * This is to avoid live-locking in bypass mode where all tasks are
+ * dispatched to %SCX_DSQ_GLOBAL and all CPUs consume from it. If
+ * per-node split isn't sufficient, it can be further split.
+ */
struct rhashtable dsq_hash;
+ struct scx_dispatch_q **global_dsqs;
+
bool warned_zero_slice;
atomic_t exit_kind;
@@ -998,16 +1008,6 @@ static unsigned long __percpu *scx_kick_cpus_pnt_seqs;
*/
static DEFINE_PER_CPU(struct task_struct *, direct_dispatch_task);
-/*
- * Dispatch queues.
- *
- * The global DSQ (%SCX_DSQ_GLOBAL) is split per-node for scalability. This is
- * to avoid live-locking in bypass mode where all tasks are dispatched to
- * %SCX_DSQ_GLOBAL and all CPUs consume from it. If per-node split isn't
- * sufficient, it can be further split.
- */
-static struct scx_dispatch_q **global_dsqs;
-
static const struct rhashtable_params dsq_hash_params = {
.key_len = sizeof_field(struct scx_dispatch_q, id),
.key_offset = offsetof(struct scx_dispatch_q, id),
@@ -1108,7 +1108,9 @@ static bool u32_before(u32 a, u32 b)
static struct scx_dispatch_q *find_global_dsq(struct task_struct *p)
{
- return global_dsqs[cpu_to_node(task_cpu(p))];
+ struct scx_sched *sch = scx_root;
+
+ return sch->global_dsqs[cpu_to_node(task_cpu(p))];
}
static struct scx_dispatch_q *find_user_dsq(struct scx_sched *sch, u64 dsq_id)
@@ -2785,11 +2787,11 @@ static bool consume_dispatch_q(struct rq *rq, struct scx_dispatch_q *dsq)
return false;
}
-static bool consume_global_dsq(struct rq *rq)
+static bool consume_global_dsq(struct scx_sched *sch, struct rq *rq)
{
int node = cpu_to_node(cpu_of(rq));
- return consume_dispatch_q(rq, global_dsqs[node]);
+ return consume_dispatch_q(rq, sch->global_dsqs[node]);
}
/**
@@ -3035,7 +3037,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
if (rq->scx.local_dsq.nr)
goto has_tasks;
- if (consume_global_dsq(rq))
+ if (consume_global_dsq(sch, rq))
goto has_tasks;
if (unlikely(!SCX_HAS_OP(scx_root, dispatch)) ||
@@ -3065,7 +3067,7 @@ static int balance_one(struct rq *rq, struct task_struct *prev)
}
if (rq->scx.local_dsq.nr)
goto has_tasks;
- if (consume_global_dsq(rq))
+ if (consume_global_dsq(sch, rq))
goto has_tasks;
/*
@@ -4406,6 +4408,11 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
struct scx_sched *sch = container_of(rcu_work, struct scx_sched, rcu_work);
struct rhashtable_iter rht_iter;
struct scx_dispatch_q *dsq;
+ int node;
+
+ for_each_node_state(node, N_POSSIBLE)
+ kfree(sch->global_dsqs[node]);
+ kfree(sch->global_dsqs);
rhashtable_walk_enter(&sch->dsq_hash, &rht_iter);
do {
@@ -5244,7 +5251,7 @@ static struct kthread_worker *scx_create_rt_helper(const char *name)
static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
{
struct scx_sched *sch;
- int ret;
+ int node, ret;
sch = kzalloc(sizeof(*sch), GFP_KERNEL);
if (!sch)
@@ -5260,16 +5267,40 @@ static struct scx_sched *scx_alloc_and_add_sched(struct sched_ext_ops *ops)
if (ret < 0)
goto err_free_ei;
+ sch->global_dsqs = kcalloc(nr_node_ids, sizeof(sch->global_dsqs[0]),
+ GFP_KERNEL);
+ if (!sch->global_dsqs) {
+ ret = -ENOMEM;
+ goto err_free_hash;
+ }
+
+ for_each_node_state(node, N_POSSIBLE) {
+ struct scx_dispatch_q *dsq;
+
+ dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
+ if (!dsq) {
+ ret = -ENOMEM;
+ goto err_free_gdsqs;
+ }
+
+ init_dsq(dsq, SCX_DSQ_GLOBAL);
+ sch->global_dsqs[node] = dsq;
+ }
+
atomic_set(&sch->exit_kind, SCX_EXIT_NONE);
sch->ops = *ops;
sch->kobj.kset = scx_kset;
ret = kobject_init_and_add(&sch->kobj, &scx_ktype, NULL, "root");
if (ret < 0)
- goto err_free_hash;
+ goto err_free_gdsqs;
return sch;
+err_free_gdsqs:
+ for_each_node_state(node, N_POSSIBLE)
+ kfree(sch->global_dsqs[node]);
+ kfree(sch->global_dsqs);
err_free_hash:
rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
err_free_ei:
@@ -5331,7 +5362,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
struct scx_task_iter sti;
struct task_struct *p;
unsigned long timeout;
- int i, cpu, node, ret;
+ int i, cpu, ret;
if (!cpumask_equal(housekeeping_cpumask(HK_TYPE_DOMAIN),
cpu_possible_mask)) {
@@ -5358,34 +5389,6 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
}
}
- if (!global_dsqs) {
- struct scx_dispatch_q **dsqs;
-
- dsqs = kcalloc(nr_node_ids, sizeof(dsqs[0]), GFP_KERNEL);
- if (!dsqs) {
- ret = -ENOMEM;
- goto err_unlock;
- }
-
- for_each_node_state(node, N_POSSIBLE) {
- struct scx_dispatch_q *dsq;
-
- dsq = kzalloc_node(sizeof(*dsq), GFP_KERNEL, node);
- if (!dsq) {
- for_each_node_state(node, N_POSSIBLE)
- kfree(dsqs[node]);
- kfree(dsqs);
- ret = -ENOMEM;
- goto err_unlock;
- }
-
- init_dsq(dsq, SCX_DSQ_GLOBAL);
- dsqs[node] = dsq;
- }
-
- global_dsqs = dsqs;
- }
-
if (scx_enable_state() != SCX_DISABLED) {
ret = -EBUSY;
goto err_unlock;
--
2.49.0
Powered by blists - more mailing lists