[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z6WlegLUWJjLD7eZ@gpd3>
Date: Fri, 7 Feb 2025 07:17:30 +0100
From: Andrea Righi <arighi@...dia.com>
To: Changwoo Min <changwoo@...lia.com>
Cc: tj@...nel.org, void@...ifault.com, kernel-dev@...lia.com,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] sched_ext: Add an event, SCX_EV_ENQ_SLICE_DFL
Hi Changwoo,
On Fri, Feb 07, 2025 at 12:13:37PM +0900, Changwoo Min wrote:
> Add a core event, SCX_EV_ENQ_SLICE_DFL, which represents how many
> tasks have been enqueued (or pick_task-ed) with a default time slice
> (SCX_SLICE_DFL).
>
> Scheduling a task with SCX_SLICE_DFL unintentionally would be a source
> of latency spikes because SCX_SLICE_DFL is relatively long (20 msec).
> Thus, soaring the SCX_EV_ENQ_SLICE_DFL value would be a sign of BPF
> scheduler bugs, causing latency spikes.
>
> __scx_add_event() is used since the caller holds an rq lock,
> so the preemption has already been disabled.
We may want to consider select_task_rq_scx() as well, when ops.select_cpu()
is not implemented (or during rq_bypass).
In this case, if scx_select_cpu_dfl() finds an idle CPU, we implicitly
dispatch the task to the local DSQ with SCX_SLICE_DFL.
Thanks,
-Andrea
>
> Signed-off-by: Changwoo Min <changwoo@...lia.com>
> ---
> kernel/sched/ext.c | 15 ++++++++++++++-
> 1 file changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 8a9a30895381..1077df9280bb 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -1468,6 +1468,12 @@ struct scx_event_stats {
> */
> u64 SCX_EV_ENQ_SKIP_EXITING;
>
> + /*
> + * The total number of tasks enqueued (or pick_task-ed) with a
> + * default time slice (SCX_SLICE_DFL).
> + */
> + u64 SCX_EV_ENQ_SLICE_DFL;
> +
> /*
> * The total duration of bypass modes in nanoseconds.
> */
> @@ -2134,6 +2140,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
> */
> touch_core_sched(rq, p);
> p->scx.slice = SCX_SLICE_DFL;
> + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
> local_norefill:
> dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags);
> return;
> @@ -2141,6 +2148,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
> global:
> touch_core_sched(rq, p); /* see the comment in local: */
> p->scx.slice = SCX_SLICE_DFL;
> + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
> dispatch_enqueue(find_global_dsq(p), p, enq_flags);
> }
>
> @@ -3202,8 +3210,10 @@ static struct task_struct *pick_task_scx(struct rq *rq)
> */
> if (keep_prev) {
> p = prev;
> - if (!p->scx.slice)
> + if (!p->scx.slice) {
> p->scx.slice = SCX_SLICE_DFL;
> + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
> + }
> } else {
> p = first_local_task(rq);
> if (!p) {
> @@ -3219,6 +3229,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
> scx_warned_zero_slice = true;
> }
> p->scx.slice = SCX_SLICE_DFL;
> + __scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
> }
> }
>
> @@ -5023,6 +5034,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
> scx_dump_event(s, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
> scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST);
> scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING);
> + scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL);
> scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
> scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
> scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE);
> @@ -7163,6 +7175,7 @@ __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events,
> scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
> scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
> scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
> + scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL);
> scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION);
> scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH);
> scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE);
> --
> 2.48.1
>
Powered by blists - more mailing lists