[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aRGiWTDEK16ge301@gpd4>
Date: Mon, 10 Nov 2025 09:29:13 +0100
From: Andrea Righi <andrea.righi@...ux.dev>
To: Tejun Heo <tj@...nel.org>
Cc: David Vernet <void@...ifault.com>, Changwoo Min <changwoo@...lia.com>,
Dan Schatzberg <schatzberg.dan@...il.com>,
Emil Tsalapatis <etsal@...a.com>, sched-ext@...ts.linux.dev,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH 08/13] sched_ext: Refactor lockup handlers into
handle_lockup()
On Sun, Nov 09, 2025 at 08:31:07AM -1000, Tejun Heo wrote:
> scx_rcu_cpu_stall() and scx_softlockup() share the same pattern: check if the
> scheduler is enabled under RCU read lock and trigger an error if so. Extract
> the common pattern into handle_lockup() helper. Add scx_verror() macro and use
> guard(rcu)().
>
> This simplifies both handlers, reduces code duplication, and prepares for
> hardlockup handling.
>
> Cc: Dan Schatzberg <schatzberg.dan@...il.com>
> Cc: Emil Tsalapatis <etsal@...a.com>
> Signed-off-by: Tejun Heo <tj@...nel.org>
Reviewed-by: Andrea Righi <arighi@...dia.com>
Thanks,
-Andrea
> ---
> kernel/sched/ext.c | 65 ++++++++++++++++++----------------------------
> 1 file changed, 25 insertions(+), 40 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 033c8b8e88e8..5c75b0125dfe 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -195,6 +195,7 @@ static __printf(4, 5) bool scx_exit(struct scx_sched *sch,
> }
>
> #define scx_error(sch, fmt, args...) scx_exit((sch), SCX_EXIT_ERROR, 0, fmt, ##args)
> +#define scx_verror(sch, fmt, args) scx_vexit((sch), SCX_EXIT_ERROR, 0, fmt, args)
>
> #define SCX_HAS_OP(sch, op) test_bit(SCX_OP_IDX(op), (sch)->has_op)
>
> @@ -3653,39 +3654,40 @@ bool scx_allow_ttwu_queue(const struct task_struct *p)
> return false;
> }
>
> -/**
> - * scx_rcu_cpu_stall - sched_ext RCU CPU stall handler
> - *
> - * While there are various reasons why RCU CPU stalls can occur on a system
> - * that may not be caused by the current BPF scheduler, try kicking out the
> - * current scheduler in an attempt to recover the system to a good state before
> - * issuing panics.
> - */
> -bool scx_rcu_cpu_stall(void)
> +static __printf(1, 2) bool handle_lockup(const char *fmt, ...)
> {
> struct scx_sched *sch;
> + va_list args;
>
> - rcu_read_lock();
> + guard(rcu)();
>
> sch = rcu_dereference(scx_root);
> - if (unlikely(!sch)) {
> - rcu_read_unlock();
> + if (unlikely(!sch))
> return false;
> - }
>
> switch (scx_enable_state()) {
> case SCX_ENABLING:
> case SCX_ENABLED:
> - break;
> + va_start(args, fmt);
> + scx_verror(sch, fmt, args);
> + va_end(args);
> + return true;
> default:
> - rcu_read_unlock();
> return false;
> }
> +}
>
> - scx_error(sch, "RCU CPU stall detected!");
> - rcu_read_unlock();
> -
> - return true;
> +/**
> + * scx_rcu_cpu_stall - sched_ext RCU CPU stall handler
> + *
> + * While there are various reasons why RCU CPU stalls can occur on a system
> + * that may not be caused by the current BPF scheduler, try kicking out the
> + * current scheduler in an attempt to recover the system to a good state before
> + * issuing panics.
> + */
> +bool scx_rcu_cpu_stall(void)
> +{
> + return handle_lockup("RCU CPU stall detected!");
> }
>
> /**
> @@ -3700,28 +3702,11 @@ bool scx_rcu_cpu_stall(void)
> */
> void scx_softlockup(u32 dur_s)
> {
> - struct scx_sched *sch;
> -
> - rcu_read_lock();
> -
> - sch = rcu_dereference(scx_root);
> - if (unlikely(!sch))
> - goto out_unlock;
> -
> - switch (scx_enable_state()) {
> - case SCX_ENABLING:
> - case SCX_ENABLED:
> - break;
> - default:
> - goto out_unlock;
> - }
> -
> - printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU%d stuck for %us, disabling \"%s\"\n",
> - smp_processor_id(), dur_s, scx_root->ops.name);
> + if (!handle_lockup("soft lockup - CPU %d stuck for %us", smp_processor_id(), dur_s))
> + return;
>
> - scx_error(sch, "soft lockup - CPU#%d stuck for %us", smp_processor_id(), dur_s);
> -out_unlock:
> - rcu_read_unlock();
> + printk_deferred(KERN_ERR "sched_ext: Soft lockup - CPU %d stuck for %us, disabling BPF scheduler\n",
> + smp_processor_id(), dur_s);
> }
>
> /**
> --
> 2.51.1
>
Powered by blists - more mailing lists