lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CABFh=a4FndcB1-Fimh68uQqXtHCXkGt4vgLb1SmCVW7D7nU-aQ@mail.gmail.com>
Date: Mon, 10 Nov 2025 16:56:19 -0500
From: Emil Tsalapatis <linux-lists@...alapatis.com>
To: Tejun Heo <tj@...nel.org>
Cc: David Vernet <void@...ifault.com>, Andrea Righi <andrea.righi@...ux.dev>, 
	Changwoo Min <changwoo@...lia.com>, Dan Schatzberg <schatzberg.dan@...il.com>, 
	Emil Tsalapatis <etsal@...a.com>, sched-ext@...ts.linux.dev, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 02/14] sched_ext: Make slice values tunable and use
 shorter slice in bypass mode

On Mon, Nov 10, 2025 at 3:57 PM Tejun Heo <tj@...nel.org> wrote:
>
> There have been reported cases of bypass mode not making forward progress fast
> enough. The 20ms default slice is unnecessarily long for bypass mode where the
> primary goal is ensuring all tasks can make forward progress.
>
> Introduce SCX_SLICE_BYPASS set to 5ms and make the scheduler automatically
> switch to it when entering bypass mode. Also make the bypass slice value
> tunable through the slice_bypass_us module parameter (adjustable between 100us
> and 100ms) to make it easier to test whether slice durations are a factor in
> problem cases.
>
> v2: Removed slice_dfl_us module parameter. Fixed typos (Andrea).
>
> Cc: Dan Schatzberg <schatzberg.dan@...il.com>
> Cc: Emil Tsalapatis <etsal@...a.com>
> Cc: Andrea Righi <andrea.righi@...ux.dev>
> Signed-off-by: Tejun Heo <tj@...nel.org>
> ---

Reviewed-by: Emil Tsalapatis <emil@...alapatis.com>

>  include/linux/sched/ext.h | 11 +++++++++++
>  kernel/sched/ext.c        | 34 +++++++++++++++++++++++++++++++---
>  2 files changed, 42 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h
> index eb776b094d36..60285c3d07cf 100644
> --- a/include/linux/sched/ext.h
> +++ b/include/linux/sched/ext.h
> @@ -17,7 +17,18 @@
>  enum scx_public_consts {
>         SCX_OPS_NAME_LEN        = 128,
>
> +       /*
> +        * %SCX_SLICE_DFL is used to refill slices when the BPF scheduler misses
> +        * to set the slice for a task that is selected for execution.
> +        * %SCX_EV_REFILL_SLICE_DFL counts the number of times the default slice
> +        * refill has been triggered.
> +        *
> +        * %SCX_SLICE_BYPASS is used as the slice for all tasks in the bypass
> +        * mode. As making forward progress for all tasks is the main goal of
> +        * the bypass mode, a shorter slice is used.
> +        */
>         SCX_SLICE_DFL           = 20 * 1000000, /* 20ms */
> +       SCX_SLICE_BYPASS        =  5 * 1000000, /*  5ms */
>         SCX_SLICE_INF           = U64_MAX,      /* infinite, implies nohz */
>  };
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index cf8d86a2585c..abf2075f174f 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -143,6 +143,32 @@ static struct scx_dump_data scx_dump_data = {
>  /* /sys/kernel/sched_ext interface */
>  static struct kset *scx_kset;
>
> +/*
> + * Parameters that can be adjusted through /sys/module/sched_ext/parameters.
> + * There usually is no reason to modify these as normal scheduler operation
> + * shouldn't be affected by them. The knobs are primarily for debugging.
> + */
> +static u64 scx_slice_dfl = SCX_SLICE_DFL;
> +static unsigned int scx_slice_bypass_us = SCX_SLICE_BYPASS / NSEC_PER_USEC;
> +
> +static int set_slice_us(const char *val, const struct kernel_param *kp)
> +{
> +       return param_set_uint_minmax(val, kp, 100, 100 * USEC_PER_MSEC);
> +}
> +
> +static const struct kernel_param_ops slice_us_param_ops = {
> +       .set = set_slice_us,
> +       .get = param_get_uint,
> +};
> +
> +#undef MODULE_PARAM_PREFIX
> +#define MODULE_PARAM_PREFIX    "sched_ext."
> +
> +module_param_cb(slice_bypass_us, &slice_us_param_ops, &scx_slice_bypass_us, 0600);
> +MODULE_PARM_DESC(slice_bypass_us, "bypass slice in microseconds, applied on [un]load (100us to 100ms)");
> +
> +#undef MODULE_PARAM_PREFIX
> +
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/sched_ext.h>
>
> @@ -919,7 +945,7 @@ static void dsq_mod_nr(struct scx_dispatch_q *dsq, s32 delta)
>
>  static void refill_task_slice_dfl(struct scx_sched *sch, struct task_struct *p)
>  {
> -       p->scx.slice = SCX_SLICE_DFL;
> +       p->scx.slice = scx_slice_dfl;
>         __scx_add_event(sch, SCX_EV_REFILL_SLICE_DFL, 1);
>  }
>
> @@ -2892,7 +2918,7 @@ void init_scx_entity(struct sched_ext_entity *scx)
>         INIT_LIST_HEAD(&scx->runnable_node);
>         scx->runnable_at = jiffies;
>         scx->ddsp_dsq_id = SCX_DSQ_INVALID;
> -       scx->slice = SCX_SLICE_DFL;
> +       scx->slice = scx_slice_dfl;
>  }
>
>  void scx_pre_fork(struct task_struct *p)
> @@ -3770,6 +3796,7 @@ static void scx_bypass(bool bypass)
>                 WARN_ON_ONCE(scx_bypass_depth <= 0);
>                 if (scx_bypass_depth != 1)
>                         goto unlock;
> +               scx_slice_dfl = scx_slice_bypass_us * NSEC_PER_USEC;
>                 bypass_timestamp = ktime_get_ns();
>                 if (sch)
>                         scx_add_event(sch, SCX_EV_BYPASS_ACTIVATE, 1);
> @@ -3778,6 +3805,7 @@ static void scx_bypass(bool bypass)
>                 WARN_ON_ONCE(scx_bypass_depth < 0);
>                 if (scx_bypass_depth != 0)
>                         goto unlock;
> +               scx_slice_dfl = SCX_SLICE_DFL;
>                 if (sch)
>                         scx_add_event(sch, SCX_EV_BYPASS_DURATION,
>                                       ktime_get_ns() - bypass_timestamp);
> @@ -4776,7 +4804,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
>                         queue_flags |= DEQUEUE_CLASS;
>
>                 scoped_guard (sched_change, p, queue_flags) {
> -                       p->scx.slice = SCX_SLICE_DFL;
> +                       p->scx.slice = scx_slice_dfl;
>                         p->sched_class = new_class;
>                 }
>         }
> --
> 2.51.2
>
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ