linux-kernel - Re: [PATCH v7 21/23] sched: Add find_exec

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4a137164-0a4a-4f7e-806e-ef532fa86ece@arm.com>
Date: Fri, 22 Dec 2023 11:57:24 +0000
From: Metin Kaya <metin.kaya@....com>
To: John Stultz <jstultz@...gle.com>, LKML <linux-kernel@...r.kernel.org>
Cc: Joel Fernandes <joelaf@...gle.com>, Qais Yousef <qyousef@...gle.com>,
 Ingo Molnar <mingo@...hat.com>, Peter Zijlstra <peterz@...radead.org>,
 Juri Lelli <juri.lelli@...hat.com>,
 Vincent Guittot <vincent.guittot@...aro.org>,
 Dietmar Eggemann <dietmar.eggemann@....com>,
 Valentin Schneider <vschneid@...hat.com>,
 Steven Rostedt <rostedt@...dmis.org>, Ben Segall <bsegall@...gle.com>,
 Zimuzo Ezeozue <zezeozue@...gle.com>, Youssef Esmat
 <youssefesmat@...gle.com>, Mel Gorman <mgorman@...e.de>,
 Daniel Bristot de Oliveira <bristot@...hat.com>,
 Will Deacon <will@...nel.org>, Waiman Long <longman@...hat.com>,
 Boqun Feng <boqun.feng@...il.com>, "Paul E. McKenney" <paulmck@...nel.org>,
 Xuewen Yan <xuewen.yan94@...il.com>, K Prateek Nayak
 <kprateek.nayak@....com>, Thomas Gleixner <tglx@...utronix.de>,
 kernel-team@...roid.com
Subject: Re: [PATCH v7 21/23] sched: Add find_exec_ctx helper

On 20/12/2023 12:18 am, John Stultz wrote:
> From: Connor O'Brien <connoro@...gle.com>
> 
> Add a helper to find the runnable owner down a chain of blocked waiters
> 
> This patch was broken out from a larger chain migration
> patch originally by Connor O'Brien.
> 
> Cc: Joel Fernandes <joelaf@...gle.com>
> Cc: Qais Yousef <qyousef@...gle.com>
> Cc: Ingo Molnar <mingo@...hat.com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Cc: Juri Lelli <juri.lelli@...hat.com>
> Cc: Vincent Guittot <vincent.guittot@...aro.org>
> Cc: Dietmar Eggemann <dietmar.eggemann@....com>
> Cc: Valentin Schneider <vschneid@...hat.com>
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Ben Segall <bsegall@...gle.com>
> Cc: Zimuzo Ezeozue <zezeozue@...gle.com>
> Cc: Youssef Esmat <youssefesmat@...gle.com>
> Cc: Mel Gorman <mgorman@...e.de>
> Cc: Daniel Bristot de Oliveira <bristot@...hat.com>
> Cc: Will Deacon <will@...nel.org>
> Cc: Waiman Long <longman@...hat.com>
> Cc: Boqun Feng <boqun.feng@...il.com>
> Cc: "Paul E. McKenney" <paulmck@...nel.org>
> Cc: Metin Kaya <Metin.Kaya@....com>
> Cc: Xuewen Yan <xuewen.yan94@...il.com>
> Cc: K Prateek Nayak <kprateek.nayak@....com>
> Cc: Thomas Gleixner <tglx@...utronix.de>
> Cc: kernel-team@...roid.com
> Signed-off-by: Connor O'Brien <connoro@...gle.com>
> [jstultz: split out from larger chain migration patch]
> Signed-off-by: John Stultz <jstultz@...gle.com>
> ---
>   kernel/sched/core.c     | 42 +++++++++++++++++++++++++++++++++++++++++
>   kernel/sched/cpupri.c   | 11 ++++++++---
>   kernel/sched/deadline.c | 15 +++++++++++++--
>   kernel/sched/rt.c       |  9 ++++++++-
>   kernel/sched/sched.h    | 10 ++++++++++
>   5 files changed, 81 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 0c212dcd4b7a..77a79d5f829a 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -3896,6 +3896,48 @@ static void activate_blocked_entities(struct rq *target_rq,
>   	}
>   	raw_spin_unlock_irqrestore(&owner->blocked_lock, flags);
>   }
> +
> +static inline bool task_queued_on_rq(struct rq *rq, struct task_struct *task)
> +{
> +	if (!task_on_rq_queued(task))
> +		return false;
> +	smp_rmb();
> +	if (task_rq(task) != rq)
> +		return false;
> +	smp_rmb();
> +	if (!task_on_rq_queued(task))
> +		return false;

* Super-nit: we may want to have empty lines between `if` blocks and 
before/after `smp_rmb()` calls.

* I did not understand why we call `task_on_rq_queued(task)` twice. 
Should we have an explanatory comment before the function definition?

> +	return true;
> +}
> +
> +/*
> + * Returns the unblocked task at the end of the blocked chain starting with p
> + * if that chain is composed entirely of tasks enqueued on rq, or NULL otherwise.
> + */
> +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
> +{
> +	struct task_struct *exec_ctx, *owner;
> +	struct mutex *mutex;
> +
> +	if (!sched_proxy_exec())
> +		return p;
> +
> +	lockdep_assert_rq_held(rq);
> +
> +	for (exec_ctx = p; task_is_blocked(exec_ctx) && !task_on_cpu(rq, exec_ctx);
> +							exec_ctx = owner) {
> +		mutex = exec_ctx->blocked_on;
> +		owner = __mutex_owner(mutex);
> +		if (owner == exec_ctx)
> +			break;
> +
> +		if (!task_queued_on_rq(rq, owner) || task_current_selected(rq, owner)) {
> +			exec_ctx = NULL;
> +			break;
> +		}
> +	}
> +	return exec_ctx;
> +}
>   #else /* !CONFIG_SCHED_PROXY_EXEC */
>   static inline void do_activate_task(struct rq *rq, struct task_struct *p,
>   				    int en_flags)
> diff --git a/kernel/sched/cpupri.c b/kernel/sched/cpupri.c
> index 15e947a3ded7..53be78afdd07 100644
> --- a/kernel/sched/cpupri.c
> +++ b/kernel/sched/cpupri.c
> @@ -96,12 +96,17 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
>   	if (skip)
>   		return 0;
>   
> -	if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
> +	if ((p && cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids) ||
> +	    (!p && cpumask_any(vec->mask) >= nr_cpu_ids))
>   		return 0;
>   
>   	if (lowest_mask) {
> -		cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
> -		cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
> +		if (p) {
> +			cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
> +			cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
> +		} else {
> +			cpumask_copy(lowest_mask, vec->mask);
> +		}

I think changes in `cpupri.c` should be part of previous (`sched: Push 
execution and scheduler context split into deadline and rt paths`) 
patch. Because they don't seem to be related with find_exec_ctx()?

>   
>   		/*
>   		 * We have to ensure that we have at least one bit
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index 999bd17f11c4..21e56ac58e32 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -1866,6 +1866,8 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused
>   
>   static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>   {
> +	struct task_struct *exec_ctx;
> +
>   	/*
>   	 * Current can't be migrated, useless to reschedule,
>   	 * let's hope p can move out.
> @@ -1874,12 +1876,16 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
>   	    !cpudl_find(&rq->rd->cpudl, rq_selected(rq), rq->curr, NULL))
>   		return;
>   
> +	exec_ctx = find_exec_ctx(rq, p);
> +	if (task_current(rq, exec_ctx))
> +		return;
> +
>   	/*
>   	 * p is migratable, so let's not schedule it and
>   	 * see if it is pushed or pulled somewhere else.
>   	 */
>   	if (p->nr_cpus_allowed != 1 &&
> -	    cpudl_find(&rq->rd->cpudl, p, p, NULL))
> +	    cpudl_find(&rq->rd->cpudl, p, exec_ctx, NULL))
>   		return;
>   
>   	resched_curr(rq);
> @@ -2169,12 +2175,17 @@ static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec
>   /* Locks the rq it finds */
>   static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
>   {
> +	struct task_struct *exec_ctx;
>   	struct rq *later_rq = NULL;
>   	int tries;
>   	int cpu;
>   
>   	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
> -		cpu = find_later_rq(task, task);
> +		exec_ctx = find_exec_ctx(rq, task);
> +		if (!exec_ctx)
> +			break;
> +
> +		cpu = find_later_rq(task, exec_ctx);
>   

Super-nit: this empty line should be removed to keep logically connected 
lines closer.
The same for find_lock_lowest_rq().

>   		if ((cpu == -1) || (cpu == rq->cpu))
>   			break;
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index 6371b0fca4ad..f8134d062fa3 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -1640,6 +1640,11 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
>   	    !cpupri_find(&rq->rd->cpupri, rq_selected(rq), rq->curr, NULL))
>   		return;
>   
> +	/* No reason to preempt since rq->curr wouldn't change anyway */
> +	exec_ctx = find_exec_ctx(rq, p);
> +	if (task_current(rq, exec_ctx))
> +		return;
> +
>   	/*
>   	 * p is migratable, so let's not schedule it and
>   	 * see if it is pushed or pulled somewhere else.
> @@ -1933,12 +1938,14 @@ static int find_lowest_rq(struct task_struct *sched_ctx, struct task_struct *exe
>   /* Will lock the rq it finds */
>   static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
>   {
> +	struct task_struct *exec_ctx;
>   	struct rq *lowest_rq = NULL;
>   	int tries;
>   	int cpu;
>   
>   	for (tries = 0; tries < RT_MAX_TRIES; tries++) {
> -		cpu = find_lowest_rq(task, task);
> +		exec_ctx = find_exec_ctx(rq, task);
> +		cpu = find_lowest_rq(task, exec_ctx);
>   
>   		if ((cpu == -1) || (cpu == rq->cpu))
>   			break;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index ef3d327e267c..6cd473224cfe 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3564,6 +3564,16 @@ int task_is_pushable(struct rq *rq, struct task_struct *p, int cpu)
>   
>   	return 0;
>   }
> +
> +#ifdef CONFIG_SCHED_PROXY_EXEC
> +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p);
> +#else /* !CONFIG_SCHED_PROXY_EXEC */
> +static inline
> +struct task_struct *find_exec_ctx(struct rq *rq, struct task_struct *p)
> +{
> +	return p;
> +}
> +#endif /* CONFIG_SCHED_PROXY_EXEC */
>   #endif

Nit: `#ifdef CONFIG_SMP` block becomes bigger after this hunk. We should 
append `/* CONFIG_SMP */` to this line, IMHO.

>   
>   #endif /* _KERNEL_SCHED_SCHED_H */