lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Z6WlegLUWJjLD7eZ@gpd3>
Date: Fri, 7 Feb 2025 07:17:30 +0100
From: Andrea Righi <arighi@...dia.com>
To: Changwoo Min <changwoo@...lia.com>
Cc: tj@...nel.org, void@...ifault.com, kernel-dev@...lia.com,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH 1/2] sched_ext: Add an event, SCX_EV_ENQ_SLICE_DFL

Hi Changwoo,

On Fri, Feb 07, 2025 at 12:13:37PM +0900, Changwoo Min wrote:
> Add a core event, SCX_EV_ENQ_SLICE_DFL, which represents how many
> tasks have been enqueued (or pick_task-ed) with a default time slice
> (SCX_SLICE_DFL).
> 
> Scheduling a task with SCX_SLICE_DFL unintentionally would be a source
> of latency spikes because SCX_SLICE_DFL is relatively long (20 msec).
> Thus, soaring the SCX_EV_ENQ_SLICE_DFL value would be a sign of BPF
> scheduler bugs, causing latency spikes.
> 
> __scx_add_event() is used since the caller holds an rq lock,
> so the preemption has already been disabled.

We may want to consider select_task_rq_scx() as well, when ops.select_cpu()
is not implemented (or during rq_bypass).

In this case, if scx_select_cpu_dfl() finds an idle CPU, we implicitly
dispatch the task to the local DSQ with SCX_SLICE_DFL.

Thanks,
-Andrea

> 
> Signed-off-by: Changwoo Min <changwoo@...lia.com>
> ---
>  kernel/sched/ext.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 8a9a30895381..1077df9280bb 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -1468,6 +1468,12 @@ struct scx_event_stats {
>  	 */
>  	u64		SCX_EV_ENQ_SKIP_EXITING;
>  
> +	/*
> +	 * The total number of tasks enqueued (or pick_task-ed) with a
> +	 * default time slice (SCX_SLICE_DFL).
> +	 */
> +	u64		SCX_EV_ENQ_SLICE_DFL;
> +
>  	/*
>  	 * The total duration of bypass modes in nanoseconds.
>  	 */
> @@ -2134,6 +2140,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
>  	 */
>  	touch_core_sched(rq, p);
>  	p->scx.slice = SCX_SLICE_DFL;
> +	__scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
>  local_norefill:
>  	dispatch_enqueue(&rq->scx.local_dsq, p, enq_flags);
>  	return;
> @@ -2141,6 +2148,7 @@ static void do_enqueue_task(struct rq *rq, struct task_struct *p, u64 enq_flags,
>  global:
>  	touch_core_sched(rq, p);	/* see the comment in local: */
>  	p->scx.slice = SCX_SLICE_DFL;
> +	__scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
>  	dispatch_enqueue(find_global_dsq(p), p, enq_flags);
>  }
>  
> @@ -3202,8 +3210,10 @@ static struct task_struct *pick_task_scx(struct rq *rq)
>  	 */
>  	if (keep_prev) {
>  		p = prev;
> -		if (!p->scx.slice)
> +		if (!p->scx.slice) {
>  			p->scx.slice = SCX_SLICE_DFL;
> +			__scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
> +		}
>  	} else {
>  		p = first_local_task(rq);
>  		if (!p) {
> @@ -3219,6 +3229,7 @@ static struct task_struct *pick_task_scx(struct rq *rq)
>  				scx_warned_zero_slice = true;
>  			}
>  			p->scx.slice = SCX_SLICE_DFL;
> +			__scx_add_event(SCX_EV_ENQ_SLICE_DFL, 1);
>  		}
>  	}
>  
> @@ -5023,6 +5034,7 @@ static void scx_dump_state(struct scx_exit_info *ei, size_t dump_len)
>  	scx_dump_event(s, &events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
>  	scx_dump_event(s, &events, SCX_EV_DISPATCH_KEEP_LAST);
>  	scx_dump_event(s, &events, SCX_EV_ENQ_SKIP_EXITING);
> +	scx_dump_event(s, &events, SCX_EV_ENQ_SLICE_DFL);
>  	scx_dump_event(s, &events, SCX_EV_BYPASS_DURATION);
>  	scx_dump_event(s, &events, SCX_EV_BYPASS_DISPATCH);
>  	scx_dump_event(s, &events, SCX_EV_BYPASS_ACTIVATE);
> @@ -7163,6 +7175,7 @@ __bpf_kfunc void scx_bpf_events(struct scx_event_stats *events,
>  		scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE);
>  		scx_agg_event(&e_sys, e_cpu, SCX_EV_DISPATCH_KEEP_LAST);
>  		scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SKIP_EXITING);
> +		scx_agg_event(&e_sys, e_cpu, SCX_EV_ENQ_SLICE_DFL);
>  		scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DURATION);
>  		scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_DISPATCH);
>  		scx_agg_event(&e_sys, e_cpu, SCX_EV_BYPASS_ACTIVATE);
> -- 
> 2.48.1
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ