[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250125091657.203445-1-arighi@nvidia.com>
Date: Sat, 25 Jan 2025 10:16:57 +0100
From: Andrea Righi <arighi@...dia.com>
To: Tejun Heo <tj@...nel.org>,
David Vernet <void@...ifault.com>,
Changwoo Min <changwoo@...lia.com>
Cc: linux-kernel@...r.kernel.org
Subject: [PATCH v4] sched_ext: Fix lock imbalance in dispatch_to_local_dsq()
While performing the rq locking dance in dispatch_to_local_dsq(), we may
trigger the following lock imbalance condition, in particular when
multiple tasks are rapidly changing CPU affinity (i.e., running a
`stress-ng --race-sched 0`):
[ 13.413579] =====================================
[ 13.413660] WARNING: bad unlock balance detected!
[ 13.413729] 6.13.0-virtme #15 Not tainted
[ 13.413792] -------------------------------------
[ 13.413859] kworker/1:1/80 is trying to release lock (&rq->__lock) at:
[ 13.413954] [<ffffffff873c6c48>] dispatch_to_local_dsq+0x108/0x1a0
[ 13.414111] but there are no more locks to release!
[ 13.414176]
[ 13.414176] other info that might help us debug this:
[ 13.414258] 1 lock held by kworker/1:1/80:
[ 13.414318] #0: ffff8b66feb41698 (&rq->__lock){-.-.}-{2:2}, at: raw_spin_rq_lock_nested+0x20/0x90
[ 13.414612]
[ 13.414612] stack backtrace:
[ 13.415255] CPU: 1 UID: 0 PID: 80 Comm: kworker/1:1 Not tainted 6.13.0-virtme #15
[ 13.415505] Workqueue: 0x0 (events)
[ 13.415567] Sched_ext: dsp_local_on (enabled+all), task: runnable_at=-2ms
[ 13.415570] Call Trace:
[ 13.415700] <TASK>
[ 13.415744] dump_stack_lvl+0x78/0xe0
[ 13.415806] ? dispatch_to_local_dsq+0x108/0x1a0
[ 13.415884] print_unlock_imbalance_bug+0x11b/0x130
[ 13.415965] ? dispatch_to_local_dsq+0x108/0x1a0
[ 13.416226] lock_release+0x231/0x2c0
[ 13.416326] _raw_spin_unlock+0x1b/0x40
[ 13.416422] dispatch_to_local_dsq+0x108/0x1a0
[ 13.416554] flush_dispatch_buf+0x199/0x1d0
[ 13.416652] balance_one+0x194/0x370
[ 13.416751] balance_scx+0x61/0x1e0
[ 13.416848] prev_balance+0x43/0xb0
[ 13.416947] __pick_next_task+0x6b/0x1b0
[ 13.417052] __schedule+0x20d/0x1740
This happens because dispatch_to_local_dsq() is racing with
dispatch_dequeue() and, when the latter wins, we incorrectly assume that
the task has been moved to dst_rq.
Fix by properly tracking the currently locked rq.
Fixes: 4d3ca89bdd31 ("sched_ext: Refactor consume_remote_task()")
Signed-off-by: Andrea Righi <arighi@...dia.com>
---
kernel/sched/ext.c | 110 ++++++++++++++++++++++++++++-----------------
1 file changed, 68 insertions(+), 42 deletions(-)
ChangeLog v3 -> v4:
- small refactoring to fix an unused variable build warning on UP
ChangeLog v2 -> v3:
- keep track of the currently locked rq in dispatch_to_local_dsq()
ChangeLog v1 -> v2:
- more comments to clarify the race with dequeue
- rebase to tip
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 4493a3c419c9..a290052478ed 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -2253,9 +2253,11 @@ static void move_local_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
* @dst_rq: rq to move the task into, locked on return
*
* Move @p which is currently on @src_rq to @dst_rq's local DSQ.
+ *
+ * Return the rq where @p has been moved.
*/
-static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
- struct rq *src_rq, struct rq *dst_rq)
+static struct rq *move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
+ struct rq *src_rq, struct rq *dst_rq)
{
lockdep_assert_rq_held(src_rq);
@@ -2277,6 +2279,8 @@ static void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
dst_rq->scx.extra_enq_flags = enq_flags;
activate_task(dst_rq, p, 0);
dst_rq->scx.extra_enq_flags = 0;
+
+ return dst_rq;
}
/*
@@ -2387,7 +2391,13 @@ static bool consume_remote_task(struct rq *this_rq, struct task_struct *p,
}
}
#else /* CONFIG_SMP */
-static inline void move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags, struct rq *src_rq, struct rq *dst_rq) { WARN_ON_ONCE(1); }
+static inline struct rq *
+move_remote_task_to_local_dsq(struct task_struct *p, u64 enq_flags,
+ struct rq *src_rq, struct rq *dst_rq)
+{
+ WARN_ON_ONCE(1);
+ return src_rq;
+}
static inline bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq, bool trigger_error) { return false; }
static inline bool consume_remote_task(struct rq *this_rq, struct task_struct *p, struct scx_dispatch_q *dsq, struct rq *task_rq) { return false; }
#endif /* CONFIG_SMP */
@@ -2538,38 +2548,12 @@ static bool consume_global_dsq(struct rq *rq)
return consume_dispatch_q(rq, global_dsqs[node]);
}
-/**
- * dispatch_to_local_dsq - Dispatch a task to a local dsq
- * @rq: current rq which is locked
- * @dst_dsq: destination DSQ
- * @p: task to dispatch
- * @enq_flags: %SCX_ENQ_*
- *
- * We're holding @rq lock and want to dispatch @p to @dst_dsq which is a local
- * DSQ. This function performs all the synchronization dancing needed because
- * local DSQs are protected with rq locks.
- *
- * The caller must have exclusive ownership of @p (e.g. through
- * %SCX_OPSS_DISPATCHING).
- */
-static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
- struct task_struct *p, u64 enq_flags)
+#ifdef CONFIG_SMP
+static void move_to_remote_dsq(struct rq *rq, struct rq *src_rq, struct rq *dst_rq,
+ struct task_struct *p, u64 enq_flags)
{
- struct rq *src_rq = task_rq(p);
- struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
+ struct rq *locked_rq = rq;
- /*
- * We're synchronized against dequeue through DISPATCHING. As @p can't
- * be dequeued, its task_rq and cpus_allowed are stable too.
- *
- * If dispatching to @rq that @p is already on, no lock dancing needed.
- */
- if (rq == src_rq && rq == dst_rq) {
- dispatch_enqueue(dst_dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
- return;
- }
-
-#ifdef CONFIG_SMP
if (unlikely(!task_can_run_on_remote_rq(p, dst_rq, true))) {
dispatch_enqueue(find_global_dsq(p), p,
enq_flags | SCX_ENQ_CLEAR_OPSS);
@@ -2593,12 +2577,16 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
atomic_long_set_release(&p->scx.ops_state, SCX_OPSS_NONE);
/* switch to @src_rq lock */
- if (rq != src_rq) {
- raw_spin_rq_unlock(rq);
+ if (locked_rq != src_rq) {
+ raw_spin_rq_unlock(locked_rq);
+ locked_rq = src_rq;
raw_spin_rq_lock(src_rq);
}
- /* task_rq couldn't have changed if we're still the holding cpu */
+ /*
+ * If p->scx.holding_cpu still matches the current CPU, task_rq(p)
+ * has not changed and we can safely move the task to @dst_rq.
+ */
if (likely(p->scx.holding_cpu == raw_smp_processor_id()) &&
!WARN_ON_ONCE(src_rq != task_rq(p))) {
/*
@@ -2610,8 +2598,8 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
p->scx.holding_cpu = -1;
dispatch_enqueue(&dst_rq->scx.local_dsq, p, enq_flags);
} else {
- move_remote_task_to_local_dsq(p, enq_flags,
- src_rq, dst_rq);
+ locked_rq = move_remote_task_to_local_dsq(p, enq_flags,
+ src_rq, dst_rq);
}
/* if the destination CPU is idle, wake it up */
@@ -2620,13 +2608,51 @@ static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
}
/* switch back to @rq lock */
- if (rq != dst_rq) {
- raw_spin_rq_unlock(dst_rq);
+ if (locked_rq != rq) {
+ raw_spin_rq_unlock(locked_rq);
raw_spin_rq_lock(rq);
}
-#else /* CONFIG_SMP */
- BUG(); /* control can not reach here on UP */
+}
+#else /* CONFIG_SMP */
+static void move_to_remote_dsq(struct rq *rq, struct rq *src_rq, struct rq *dst_rq,
+ struct task_struct *p, u64 enq_flags)
+{
+ BUG(); /* this should never be called on UP */
+}
#endif /* CONFIG_SMP */
+
+/**
+ * dispatch_to_local_dsq - Dispatch a task to a local dsq
+ * @rq: current rq which is locked
+ * @dst_dsq: destination DSQ
+ * @p: task to dispatch
+ * @enq_flags: %SCX_ENQ_*
+ *
+ * We're holding @rq lock and want to dispatch @p to @dst_dsq which is a local
+ * DSQ. This function performs all the synchronization dancing needed because
+ * local DSQs are protected with rq locks.
+ *
+ * The caller must have exclusive ownership of @p (e.g. through
+ * %SCX_OPSS_DISPATCHING).
+ */
+static void dispatch_to_local_dsq(struct rq *rq, struct scx_dispatch_q *dst_dsq,
+ struct task_struct *p, u64 enq_flags)
+{
+ struct rq *src_rq = task_rq(p);
+ struct rq *dst_rq = container_of(dst_dsq, struct rq, scx.local_dsq);
+
+ /*
+ * We're synchronized against dequeue through DISPATCHING. As @p can't
+ * be dequeued, its task_rq and cpus_allowed are stable too.
+ *
+ * If dispatching to @rq that @p is already on, no lock dancing needed.
+ */
+ if (rq == src_rq && rq == dst_rq) {
+ dispatch_enqueue(dst_dsq, p, enq_flags | SCX_ENQ_CLEAR_OPSS);
+ return;
+ }
+
+ move_to_remote_dsq(rq, src_rq, dst_rq, p, enq_flags);
}
/**
--
2.48.1
Powered by blists - more mailing lists