[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <11E57357-EDAE-4C7A-B292-3BD41BCFA9CF@nutanix.com>
Date: Wed, 16 Jul 2025 02:47:54 +0000
From: Harshit Agarwal <harshit@...anix.com>
To: Zicheng Qu <quzicheng@...wei.com>
CC: "mingo@...hat.com" <mingo@...hat.com>,
"peterz@...radead.org"
<peterz@...radead.org>,
"juri.lelli@...hat.com" <juri.lelli@...hat.com>,
"vincent.guittot@...aro.org" <vincent.guittot@...aro.org>,
"dietmar.eggemann@....com" <dietmar.eggemann@....com>,
"rostedt@...dmis.org"
<rostedt@...dmis.org>,
"bsegall@...gle.com" <bsegall@...gle.com>,
"mgorman@...e.de" <mgorman@...e.de>,
"vschneid@...hat.com"
<vschneid@...hat.com>,
"raistlin@...ux.it" <raistlin@...ux.it>,
"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
"tanghui20@...wei.com" <tanghui20@...wei.com>,
"zhangqiao22@...wei.com"
<zhangqiao22@...wei.com>,
"judy.chenhui@...wei.com" <judy.chenhui@...wei.com>
Subject: Re: [PATCH] sched/dl: Fix race in push_dl_task
Hi Zicheng,
Thanks for this change.
I already have the dl version of this patch reviewed by Juri.
Not merged yet. You can find it here:
https://lore.kernel.org/lkml/20250408045021.3283624-1-harshit@nutanix.com/
Regards,
Harshit
> On Jul 15, 2025, at 6:15 PM, Zicheng Qu <quzicheng@...wei.com> wrote:
>
> !-------------------------------------------------------------------|
> CAUTION: External Email
>
> |-------------------------------------------------------------------!
>
> push_dl_task() pick the first pushable task and find an eligible
> lowest_rq, then double_lock_balance(rq, lowest_rq), so if
> double_lock_balance() unlock the rq (when double_lock_balance() return
> 1), we have to check if this task is still on the rq. We cannot rely on
> these checks of this task to make sure it is still on the original rq,
> even though we hold the rq->lock. This patch will repick the first
> pushable task to be sure the task is still on the rq.
>
> This is very similar to the issue report in CVE-2025-38234, one is rt
> and one is ddl. The patch for the rt bugfix is
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lore.kernel.org_r_20250225180553.167995-2D1-2Dharshit-40nutanix.com&d=DwIDAg&c=s883GpUCOChKOHiocYtGcg&r=QTPVhNgH716-zU_kPmte39o3vGFVupnGmmfiVBpq9PU&m=l-FlO7f0a0O4xMcW2cS-2txhwNCjsjmXNKh7uUiL3mugOfQ73eYCxijD81W2lPqd&s=yeTV_SZthzrBsGuZCPT9A_Ans4Ze6VVhWnk6IngUtg0&e=
>
> Fixes: 1baca4ce16b8 ("sched/deadline: Add SCHED_DEADLINE SMP-related data structures & logic")
> Cc: stable@...r.kernel.org
> Signed-off-by: Zicheng Qu <quzicheng@...wei.com>
> ---
> kernel/sched/deadline.c | 53 ++++++++++++++++++++++-------------------
> 1 file changed, 28 insertions(+), 25 deletions(-)
>
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 89019a140826..3b53e71f2b86 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -2619,6 +2619,25 @@ static int find_later_rq(struct task_struct *task)
> return -1;
> }
>
> +static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
> +{
> + struct task_struct *p;
> +
> + if (!has_pushable_dl_tasks(rq))
> + return NULL;
> +
> + p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
> +
> + WARN_ON_ONCE(rq->cpu != task_cpu(p));
> + WARN_ON_ONCE(task_current(rq, p));
> + WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> +
> + WARN_ON_ONCE(!task_on_rq_queued(p));
> + WARN_ON_ONCE(!dl_task(p));
> +
> + return p;
> +}
> +
> /* Locks the rq it finds */
> static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
> {
> @@ -2646,12 +2665,15 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
>
> /* Retry if something changed. */
> if (double_lock_balance(rq, later_rq)) {
> - if (unlikely(task_rq(task) != rq ||
> - !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
> - task_on_cpu(rq, task) ||
> - !dl_task(task) ||
> - is_migration_disabled(task) ||
> - !task_on_rq_queued(task))) {
> + /*
> + * We had to unlock the run queue. In
> + * the mean time, task could have
> + * migrated already or had its affinity changed.
> + * Also make sure that it wasn't scheduled on its rq.
> + */
> + if (unlikely(is_migration_disabled(task) ||
> + !cpumask_test_cpu(later_rq->cpu, &task->cpus_mask) ||
> + task != pick_next_pushable_dl_task(rq))) {
> double_unlock_balance(rq, later_rq);
> later_rq = NULL;
> break;
> @@ -2674,25 +2696,6 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
> return later_rq;
> }
>
> -static struct task_struct *pick_next_pushable_dl_task(struct rq *rq)
> -{
> - struct task_struct *p;
> -
> - if (!has_pushable_dl_tasks(rq))
> - return NULL;
> -
> - p = __node_2_pdl(rb_first_cached(&rq->dl.pushable_dl_tasks_root));
> -
> - WARN_ON_ONCE(rq->cpu != task_cpu(p));
> - WARN_ON_ONCE(task_current(rq, p));
> - WARN_ON_ONCE(p->nr_cpus_allowed <= 1);
> -
> - WARN_ON_ONCE(!task_on_rq_queued(p));
> - WARN_ON_ONCE(!dl_task(p));
> -
> - return p;
> -}
> -
> /*
> * See if the non running -deadline tasks on this rq
> * can be sent to some other CPU where they can preempt
> --
> 2.34.1
>
Powered by blists - more mailing lists