[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZsTwoWJQcnsJhYbe@slm.duckdns.org>
Date: Tue, 20 Aug 2024 09:38:09 -1000
From: Tejun Heo <tj@...nel.org>
To: Aboorva Devarajan <aboorvad@...ux.ibm.com>
Cc: void@...ifault.com, linux-kernel@...r.kernel.org
Subject: Re: [sched_ext/for-6.11]: Issue with BPF Scheduler during CPU Hotplug
On Tue, Aug 20, 2024 at 12:33:34PM +0530, Aboorva Devarajan wrote:
> On Tue, 2024-08-13 at 09:54 -1000, Tejun Heo wrote:
> > Hello,
> >
> > On Sat, Aug 10, 2024 at 11:54:24PM +0530, Aboorva Devarajan wrote:
> > ...
> > ...
>
> > Can you trigger sysrq-t when the system is stuck? Also, can you see whether
> > the problem is reproducible on x86 w/ the wq changes applied?
>
> Hi Tejun,
>
> I couldn't trigger sysrq-t so far as the system becomes unresponsive,
> will get back if I can successfully trigger this via console.
>
> And yes, this issue also occurs on x86 when applying the proposed workqueue
> patch on top of the recent sched-ext for-6.12 tree. However, it takes
> significantly longer to hit the issue, and the stack trace differs
> a bit.
I think this *should* fix the problem but it is rather ugly. The locking
order is such that there's no good way out. Maybe the solution is making
cpu_hotplug_disable() more useful. Anyways, can you test this one?
Thanks.
---
kernel/sched/ext.c | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3990,6 +3990,24 @@ static const char *scx_exit_reason(enum
}
}
+static void block_fork_hotplug(void)
+{
+ while (true) {
+ percpu_down_write(&scx_fork_rwsem);
+ if (cpus_read_trylock())
+ return;
+ percpu_up_write(&scx_fork_rwsem);
+ cpus_read_lock();
+ cpus_read_unlock();
+ }
+}
+
+static void unblock_fork_hotplug(void)
+{
+ cpus_read_unlock();
+ percpu_up_write(&scx_fork_rwsem);
+}
+
static void scx_ops_disable_workfn(struct kthread_work *work)
{
struct scx_exit_info *ei = scx_exit_info;
@@ -4045,8 +4063,7 @@ static void scx_ops_disable_workfn(struc
* Avoid racing against fork. See scx_ops_enable() for explanation on
* the locking order.
*/
- percpu_down_write(&scx_fork_rwsem);
- cpus_read_lock();
+ block_fork_hotplug();
spin_lock_irq(&scx_tasks_lock);
scx_task_iter_init(&sti);
@@ -4090,8 +4107,7 @@ static void scx_ops_disable_workfn(struc
static_branch_disable_cpuslocked(&scx_builtin_idle_enabled);
synchronize_rcu();
- cpus_read_unlock();
- percpu_up_write(&scx_fork_rwsem);
+ unblock_fork_hotplug();
if (ei->kind >= SCX_EXIT_ERROR) {
pr_err("sched_ext: BPF scheduler \"%s\" disabled (%s)\n",
@@ -4657,8 +4673,7 @@ static int scx_ops_enable(struct sched_e
*
* scx_fork_rwsem --> pernet_ops_rwsem --> cpu_hotplug_lock
*/
- percpu_down_write(&scx_fork_rwsem);
- cpus_read_lock();
+ block_fork_hotplug();
check_hotplug_seq(ops);
@@ -4765,8 +4780,7 @@ static int scx_ops_enable(struct sched_e
spin_unlock_irq(&scx_tasks_lock);
preempt_enable();
- cpus_read_unlock();
- percpu_up_write(&scx_fork_rwsem);
+ unblock_fork_hotplug();
/* see above ENABLING transition for the explanation on exiting with 0 */
if (!scx_ops_tryset_enable_state(SCX_OPS_ENABLED, SCX_OPS_ENABLING)) {
Powered by blists - more mailing lists