[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <aFji5HozfgwzRj8L@gpd4>
Date: Mon, 23 Jun 2025 07:15:16 +0200
From: Andrea Righi <arighi@...dia.com>
To: Henry Huang <henry.hj@...group.com>
Cc: changwoo@...lia.com, tj@...nel.org, void@...ifault.com,
谈鉴锋 <henry.tjf@...group.com>,
"Yan Yan(cailing)" <yanyan.yan@...group.com>,
linux-kernel@...r.kernel.org, sched-ext@...ts.linux.dev
Subject: Re: [PATCH v2] sched_ext: include SCX_OPS_TRACK_MIGRATION
Hi Henry,
On Mon, Jun 23, 2025 at 12:32:20PM +0800, Henry Huang wrote:
> For some BPF-schedulers, they should do something when
> task is doing migration, such as updating per-cpu map.
> If SCX_OPS_TRACK_MIGRATION is set, runnable/quiescent
> would be called whether task is doing migration or not.
>
> Signed-off-by: Henry Huang <henry.hj@...group.com>
Looks good. One last thing, since we're exposing ENQUEUE_MIGRATING and
DEQUEUE_MIGRATING to BPF, can you introduce some scx enums, similar to what
we're doing with DEQUEUE_SLEEP? Something like the following.
With that you can add my:
Reviewed-by: Andrea Righi <arighi@...dia.com>
Thanks,
-Andrea
kernel/sched/ext.c | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 2f0ab232a2786..bf04cde71b34a 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -918,8 +918,11 @@ enum scx_enq_flags {
};
enum scx_deq_flags {
+ /* expose select ENQUEUE_* flags as enums */
+ SCX_ENQ_MIGRATING = ENQUEUE_MIGRATING,
/* expose select DEQUEUE_* flags as enums */
SCX_DEQ_SLEEP = DEQUEUE_SLEEP,
+ SCX_DEQ_MIGRATING = DEQUEUE_MIGRATING,
/* high 32bits are SCX specific */
@@ -2385,10 +2388,10 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
add_nr_running(rq, 1);
if (task_on_rq_migrating(p))
- enq_flags |= ENQUEUE_MIGRATING;
+ enq_flags |= SCX_ENQ_MIGRATING;
if (SCX_HAS_OP(sch, runnable) &&
- ((sch->ops.flags & SCX_OPS_TRACK_MIGRATION) || !(enq_flags & ENQUEUE_MIGRATING)))
+ ((sch->ops.flags & SCX_OPS_TRACK_MIGRATION) || !(enq_flags & SCX_ENQ_MIGRATING)))
SCX_CALL_OP_TASK(sch, SCX_KF_REST, runnable, rq, p, enq_flags);
if (enq_flags & SCX_ENQ_WAKEUP)
@@ -2471,7 +2474,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
}
if (task_on_rq_migrating(p))
- deq_flags |= DEQUEUE_MIGRATING;
+ deq_flags |= SCX_DEQ_MIGRATING;
ops_dequeue(rq, p, deq_flags);
@@ -2493,7 +2496,7 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
}
if (SCX_HAS_OP(sch, quiescent) &&
- ((sch->ops.flags & SCX_OPS_TRACK_MIGRATION) || !(deq_flags & DEQUEUE_MIGRATING)))
+ ((sch->ops.flags & SCX_OPS_TRACK_MIGRATION) || !(deq_flags & SCX_DEQ_MIGRATING)))
SCX_CALL_OP_TASK(sch, SCX_KF_REST, quiescent, rq, p, deq_flags);
if (deq_flags & SCX_DEQ_SLEEP)
> ---
> kernel/sched/ext.c | 24 ++++++++++++++++++++++--
> 1 file changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index b498d86..42c5251 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -161,6 +161,12 @@ enum scx_ops_flags {
> SCX_OPS_BUILTIN_IDLE_PER_NODE = 1LLU << 6,
>
> /*
> + * If set, runnable/quiescent ops would be called whether the task is
> + * doing migration or not.
> + */
> + SCX_OPS_TRACK_MIGRATION = 1LLU << 7,
> +
> + /*
> * CPU cgroup support flags
> */
> SCX_OPS_HAS_CGROUP_WEIGHT = 1LLU << 16, /* DEPRECATED, will be removed on 6.18 */
> @@ -172,6 +178,7 @@ enum scx_ops_flags {
> SCX_OPS_ALLOW_QUEUED_WAKEUP |
> SCX_OPS_SWITCH_PARTIAL |
> SCX_OPS_BUILTIN_IDLE_PER_NODE |
> + SCX_OPS_TRACK_MIGRATION |
> SCX_OPS_HAS_CGROUP_WEIGHT,
>
> /* high 8 bits are internal, don't include in SCX_OPS_ALL_FLAGS */
> @@ -2390,7 +2397,11 @@ static void enqueue_task_scx(struct rq *rq, struct task_struct *p, int enq_flags
> rq->scx.nr_running++;
> add_nr_running(rq, 1);
>
> - if (SCX_HAS_OP(sch, runnable) && !task_on_rq_migrating(p))
> + if (task_on_rq_migrating(p))
> + enq_flags |= ENQUEUE_MIGRATING;
> +
> + if (SCX_HAS_OP(sch, runnable) &&
> + ((sch->ops.flags & SCX_OPS_TRACK_MIGRATION) || !(enq_flags & ENQUEUE_MIGRATING)))
> SCX_CALL_OP_TASK(sch, SCX_KF_REST, runnable, rq, p, enq_flags);
>
> if (enq_flags & SCX_ENQ_WAKEUP)
> @@ -2463,6 +2474,9 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
> return true;
> }
>
> + if (task_on_rq_migrating(p))
> + deq_flags |= DEQUEUE_MIGRATING;
> +
> ops_dequeue(rq, p, deq_flags);
>
> /*
> @@ -2482,7 +2496,8 @@ static bool dequeue_task_scx(struct rq *rq, struct task_struct *p, int deq_flags
> SCX_CALL_OP_TASK(sch, SCX_KF_REST, stopping, rq, p, false);
> }
>
> - if (SCX_HAS_OP(sch, quiescent) && !task_on_rq_migrating(p))
> + if (SCX_HAS_OP(sch, quiescent) &&
> + ((sch->ops.flags & SCX_OPS_TRACK_MIGRATION) || !(deq_flags & DEQUEUE_MIGRATING)))
> SCX_CALL_OP_TASK(sch, SCX_KF_REST, quiescent, rq, p, deq_flags);
>
> if (deq_flags & SCX_DEQ_SLEEP)
> @@ -5495,6 +5510,11 @@ static int validate_ops(struct scx_sched *sch, const struct sched_ext_ops *ops)
> return -EINVAL;
> }
>
> + if ((ops->flags & SCX_OPS_TRACK_MIGRATION) && (!ops->runnable || !ops->quiescent)) {
> + scx_error(sch, "SCX_OPS_TRACK_MIGRATION requires ops.runnable() and ops.quiescent() to be implemented");
> + return -EINVAL;
> + }
> +
> if (ops->flags & SCX_OPS_HAS_CGROUP_WEIGHT)
> pr_warn("SCX_OPS_HAS_CGROUP_WEIGHT is deprecated and a noop\n");
>
> --
> Henry
>
Powered by blists - more mailing lists