linux-kernel - [PATCH] sched/dl: Fix race in push_dl

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [thread-next>] [day] [month] [year] [list]

Message-ID: <20250716011516.3591655-1-quzicheng@huawei.com>
Date: Wed, 16 Jul 2025 01:15:16 +0000
From: Zicheng Qu <quzicheng@...wei.com>
To: <mingo@...hat.com>, <peterz@...radead.org>, <juri.lelli@...hat.com>,
	<vincent.guittot@...aro.org>, <dietmar.eggemann@....com>,
	<rostedt@...dmis.org>, <bsegall@...gle.com>, <mgorman@...e.de>,
	<vschneid@...hat.com>, <raistlin@...ux.it>, <harshit@...anix.com>,
	<linux-kernel@...r.kernel.org>
CC: <tanghui20@...wei.com>, <zhangqiao22@...wei.com>,
	<judy.chenhui@...wei.com>, <quzicheng@...wei.com>
Subject: [PATCH] sched/dl: Fix race in push_dl_task

push_dl_task() pick the first pushable task and find an eligible
lowest_rq, then double_lock_balance(rq, lowest_rq), so if
double_lock_balance() unlock the rq (when double_lock_balance() return
1), we have to check if this task is still on the rq. We cannot rely on
these checks of this task to make sure it is still on the original rq,
even though we hold the rq->lock. This patch will repick the first
pushable task to be sure the task is still on the rq.

This is very similar to the issue report in CVE-2025-38234, one is rt
and one is ddl. The patch for the rt bugfix is
https://lore.kernel.org/r/20250225180553.167995-1-harshit@nutanix.com

Fixes: 1baca4ce16b8 ("sched/deadline: Add SCHED_DEADLINE SMP-related data structures & logic")
Cc: stable@...r.kernel.org
Signed-off-by: Zicheng Qu <quzicheng@...wei.com>
---
 kernel/sched/deadline.c | 53 ++++++++++++++++++++++-------------------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 89019a140826..3b53e71f2b86 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -2619,6 +2619,25 @@ static int find_later_rq(struct task_struct *task)
 	return -1;
 }
 
+static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
+{
+	struct task_struct *p;
+
+	if (!has_pushable_dl_tasks(rq))
+		return NULL;
+
+	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
+
+	WARN_ON_ONCE(rq->cpu != task_cpu(p));
+	WARN_ON_ONCE(task_current(rq, p));
+	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
+
+	WARN_ON_ONCE(!task_on_rq_queued(p));
+	WARN_ON_ONCE(!dl_task(p));
+
+	return p;
+}
+
 /* Locks the rq it finds */
 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 {
@@ -2646,12 +2665,15 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 
 		/* Retry if something changed. */
 		if (double_lock_balance(rq, later_rq)) {
-			if (unlikely(task_rq(task) != rq ||
-				     !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
-				     task_on_cpu(rq, task) ||
-				     !dl_task(task) ||
-				     is_migration_disabled(task) ||
-				     !task_on_rq_queued(task))) {
+			/*
+			 * We had to unlock the run queue. In
+			 * the mean time, task could have
+			 * migrated already or had its affinity changed.
+			 * Also make sure that it wasn't scheduled on its rq.
+			 */
+			if (unlikely(is_migration_disabled(task) ||
+					!cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
+					task != pick_next_pushable_dl_task(rq))) {
 				double_unlock_balance(rq, later_rq);
 				later_rq = NULL;
 				break;
@@ -2674,25 +2696,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 	return later_rq;
 }
 
-static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
-{
-	struct task_struct *p;
-
-	if (!has_pushable_dl_tasks(rq))
-		return NULL;
-
-	p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
-
-	WARN_ON_ONCE(rq->cpu != task_cpu(p));
-	WARN_ON_ONCE(task_current(rq, p));
-	WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
-
-	WARN_ON_ONCE(!task_on_rq_queued(p));
-	WARN_ON_ONCE(!dl_task(p));
-
-	return p;
-}
-
 /*
  * See if the non running -deadline tasks on this rq
  * can be sent to some other CPU where they can preempt
-- 
2.34.1