[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <5b57c6e4-eeeb-4438-8b06-82afe4b4ad65@amd.com>
Date: Tue, 30 Dec 2025 12:16:23 +0530
From: K Prateek Nayak <kprateek.nayak@....com>
To: John Stultz <jstultz@...gle.com>, LKML <linux-kernel@...r.kernel.org>
CC: Joel Fernandes <joelagnelf@...dia.com>, Qais Yousef <qyousef@...alina.io>,
Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>, "Juri
Lelli" <juri.lelli@...hat.com>, Vincent Guittot <vincent.guittot@...aro.org>,
Dietmar Eggemann <dietmar.eggemann@....com>, Valentin Schneider
<vschneid@...hat.com>, Steven Rostedt <rostedt@...dmis.org>, Ben Segall
<bsegall@...gle.com>, Zimuzo Ezeozue <zezeozue@...gle.com>, Mel Gorman
<mgorman@...e.de>, Will Deacon <will@...nel.org>, Waiman Long
<longman@...hat.com>, Boqun Feng <boqun.feng@...il.com>, "Paul E. McKenney"
<paulmck@...nel.org>, Metin Kaya <Metin.Kaya@....com>, Xuewen Yan
<xuewen.yan94@...il.com>, Thomas Gleixner <tglx@...utronix.de>, "Daniel
Lezcano" <daniel.lezcano@...aro.org>, Suleiman Souhlal <suleiman@...gle.com>,
kuyo chang <kuyo.chang@...iatek.com>, hupu <hupu.gm@...il.com>,
<kernel-team@...roid.com>
Subject: Re: [PATCH v24 11/11] sched: Migrate whole chain in
proxy_migrate_task()
Hello John,
On 11/25/2025 4:01 AM, John Stultz wrote:
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index 178ed37850470..775cc06f756d0 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1243,6 +1243,9 @@ struct task_struct {
> struct mutex *blocked_on; /* lock we're blocked on */
> struct task_struct *blocked_donor; /* task that is boosting this task */
> raw_spinlock_t blocked_lock;
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> + struct list_head migration_node;
We can just reuse the "p->se.group_node" here instead of adding the
"migration_node" - it is only used to move tasks during load balancing,
and once we deactivate a task, it can no longer be picked for load
balancing so we should be safe (serialized by the rq_lock).
> +#endif
>
> #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
> /*
> diff --git a/init/init_task.c b/init/init_task.c
> index 34853a511b4d8..78fb7cb83fa5d 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -178,6 +178,9 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
> &init_task.alloc_lock),
> #endif
> .blocked_donor = NULL,
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> + .migration_node = LIST_HEAD_INIT(init_task.migration_node),
> +#endif
> #ifdef CONFIG_RT_MUTEXES
> .pi_waiters = RB_ROOT_CACHED,
> .pi_top_task = NULL,
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 0a9a17e25b85d..a7561480e879e 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2137,6 +2137,9 @@ __latent_entropy struct task_struct *copy_process(
>
> p->blocked_on = NULL; /* not blocked yet */
> p->blocked_donor = NULL; /* nobody is boosting p yet */
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> + INIT_LIST_HEAD(&p->migration_node);
> +#endif
>
> #ifdef CONFIG_BCACHE
> p->sequential_io = 0;
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 7f42ec01192dc..0c50d154050a3 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6723,6 +6723,7 @@ static void proxy_migrate_task(struct rq *rq, struct rq_flags *rf,
> struct task_struct *p, int target_cpu)
> {
> struct rq *target_rq = cpu_rq(target_cpu);
> + LIST_HEAD(migrate_list);
>
> lockdep_assert_rq_held(rq);
>
> @@ -6739,11 +6740,16 @@ static void proxy_migrate_task(struct rq *rq, struct rq_flags *rf,
> */
> proxy_resched_idle(rq);
>
> - WARN_ON(p == rq->curr);
> -
> - deactivate_task(rq, p, DEQUEUE_NOCLOCK);
> - proxy_set_task_cpu(p, target_cpu);
> -
> + for (; p; p = p->blocked_donor) {
> + WARN_ON(p == rq->curr);
> + deactivate_task(rq, p, DEQUEUE_NOCLOCK);
> + proxy_set_task_cpu(p, target_cpu);
> + /*
> + * We can abuse blocked_node to migrate the thing,
> + * because @p was still on the rq.
> + */
> + list_add(&p->migration_node, &migrate_list);
> + }
> /*
> * We have to zap callbacks before unlocking the rq
> * as another CPU may jump in and call sched_balance_rq
> @@ -6755,8 +6761,12 @@ static void proxy_migrate_task(struct rq *rq, struct rq_flags *rf,
> raw_spin_rq_unlock(rq);
>
> raw_spin_rq_lock(target_rq);
> - activate_task(target_rq, p, 0);
> - wakeup_preempt(target_rq, p, 0);
> + while (!list_empty(&migrate_list)) {
> + p = list_first_entry(&migrate_list, struct task_struct, migration_node);
> + list_del_init(&p->migration_node);
> + activate_task(target_rq, p, 0);
> + wakeup_preempt(target_rq, p, 0);
> + }
> raw_spin_rq_unlock(target_rq);
Here, we can extract the core logic of attach_tasks() into a helper and
reuse is as:
(Included stuff from the suggestion on Patch 6 + the above suggestion;
build + boot tested with test-ww_mutex and sched-messaging)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5a6ac4d13112..700fd08b2392 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6786,7 +6786,7 @@ static void proxy_migrate_task(struct rq *rq, struct rq_flags *rf,
* We can abuse blocked_node to migrate the thing,
* because @p was still on the rq.
*/
- list_add(&p->migration_node, &migrate_list);
+ list_add(&p->se.group_node, &migrate_list);
}
/*
* Zap balance callbacks and drop the rq_lock
@@ -6794,14 +6794,8 @@ static void proxy_migrate_task(struct rq *rq, struct rq_flags *rf,
*/
guard(proxy_drop_reacquire_rq_lock)(rq, rf);
- raw_spin_rq_lock(target_rq);
- while (!list_empty(&migrate_list)) {
- p = list_first_entry(&migrate_list, struct task_struct, migration_node);
- list_del_init(&p->migration_node);
- activate_task(target_rq, p, 0);
- wakeup_preempt(target_rq, p, 0);
- }
- raw_spin_rq_unlock(target_rq);
+ /* Move migrate_list to target_rq. */
+ __attach_tasks(target_rq, &migrate_list);
}
static void proxy_force_return(struct rq *rq, struct rq_flags *rf,
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7d2e92a55b16..f24ca09ded8a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9700,27 +9700,28 @@ static void attach_one_task(struct rq *rq, struct task_struct *p)
rq_unlock(rq, &rf);
}
-/*
- * attach_tasks() -- attaches all tasks detached by detach_tasks() to their
- * new rq.
- */
-static void attach_tasks(struct lb_env *env)
+void __attach_tasks(struct rq *rq, struct list_head *tasks)
{
- struct list_head *tasks = &env->tasks;
- struct task_struct *p;
- struct rq_flags rf;
-
- rq_lock(env->dst_rq, &rf);
- update_rq_clock(env->dst_rq);
+ guard(rq_lock)(rq);
+ update_rq_clock(rq);
while (!list_empty(tasks)) {
+ struct task_struct *p;
+
p = list_first_entry(tasks, struct task_struct, se.group_node);
list_del_init(&p->se.group_node);
- attach_task(env->dst_rq, p);
+ attach_task(rq, p);
}
+}
- rq_unlock(env->dst_rq, &rf);
+/*
+ * attach_tasks() -- attaches all tasks detached by detach_tasks() to their
+ * new rq.
+ */
+static void attach_tasks(struct lb_env *env)
+{
+ __attach_tasks(env->dst_rq, &env->tasks);
}
#ifdef CONFIG_NO_HZ_COMMON
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 424c40bd46e2..43b9ee90c67f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1368,6 +1368,8 @@ static inline void rq_set_donor(struct rq *rq, struct task_struct *t)
}
#endif
+void __attach_tasks(struct rq *rq, struct list_head *tasks);
+
#ifdef CONFIG_SCHED_CORE
static inline struct cpumask *sched_group_span(struct sched_group *sg);
---
Thoughts?
>
> raw_spin_rq_lock(rq);
--
Thanks and Regards,
Prateek
Powered by blists - more mailing lists