[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250207064052.399227-2-changwoo@igalia.com>
Date: Fri, 7 Feb 2025 15:40:51 +0900
From: Changwoo Min <changwoo@...lia.com>
To: tj@...nel.org,
void@...ifault.com,
arighi@...dia.com
Cc: kernel-dev@...lia.com,
linux-kernel@...r.kernel.org,
Changwoo Min <changwoo@...lia.com>
Subject: [PATCH v2 1/2] sched_ext: Add an event, SCX_EV_ENQ_SLICE_DFL
Add a core event, SCX_EV_ENQ_SLICE_DFL, which represents how many
tasks have been enqueued (or pick_task-ed or select_cpu-ed) with
a default time slice (SCX_SLICE_DFL).
Scheduling a task with SCX_SLICE_DFL unintentionally would be a source
of latency spikes because SCX_SLICE_DFL is relatively long (20 msec).
Thus, soaring the SCX_EV_ENQ_SLICE_DFL value would be a sign of BPF
scheduler bugs, causing latency spikes, especially when ops.select_cpu()
is provided.
__scx_add_event() is used since the caller holds an rq lock or p->pi_lock,
so the preemption has already been disabled.
Signed-off-by: Changwoo Min <changwoo@...lia.com>
---
kernel/sched/ext.c | 16 +++++++++++++++-
1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 8a9a30895381..5ef90d9bcdd2 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -1468,6 +1468,12 @@ struct scx_event_stats {
*/
u64 SCX_EV_ENQ_SKIP_EXITING;
+ /*
+ * The total number of tasks enqueued (or pick_task-ed) with a
+ * default time slice (SCX_SLICE_DFL).
+ */
+ u64 SCX_EV_ENQ_SLICE_DFL;
+
/*
* The total duration of bypass modes in nanoseconds.
*/
@@ -2134,6 +2140,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
*/
touch_core_sched(rq, p);
p->scx.slice = SCX_SLICE_DFL;
+ __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
local_norefill:
dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags);
return;
@@ -2141,6 +2148,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
global:
touch_core_sched(rq, p); /* see the comment in local: */
p->scx.slice = SCX_SLICE_DFL;
+ __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
dispatch_enqueue(find_global_dsq(p), p, enq_flags);
}
@@ -3202,8 +3210,10 @@ static struct task_struct *pick_task_scx(struct rq *rq)
*/
if (keep_prev) {
p = prev;
- if (!p->scx.slice)
+ if (!p->scx.slice) {
p->scx.slice = SCX_SLICE_DFL;
+ __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
+ }
} else {
p = first_local_task(rq);
if (!p) {
@@ -3219,6 +3229,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
scx_warned_zero_slice = true;
}
p->scx.slice = SCX_SLICE_DFL;
+ __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
}
}
@@ -3306,6 +3317,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag
if (found) {
p->scx.slice = SCX_SLICE_DFL;
p->scx.ddsp_dsq_id = SCX_DSQ_LOCAL;
+ __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
}
if (rq_bypass)
@@ -5023,6 +5035,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
scx_dump_event(s, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST);
scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING);
+ scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL);
scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE);
@@ -7163,6 +7176,7 @@ __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events,
scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
+ scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL);
scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION);
scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH);
scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE);
--
2.48.1
Powered by blists - more mailing lists