lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20240806081259.GN37996@noisy.programming.kicks-ass.net>
Date: Tue, 6 Aug 2024 10:12:59 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: Tejun Heo <tj@...nel.org>
Cc: void@...ifault.com, linux-kernel@...r.kernel.org, kernel-team@...a.com,
	mingo@...hat.com
Subject: Re: [PATCH 6/6] sched_ext: Make task_can_run_on_remote_rq() use
 common task_allowed_on_cpu()

On Sat, Aug 03, 2024 at 04:40:13PM -1000, Tejun Heo wrote:
> task_can_run_on_remote_rq() is similar to is_cpu_allowed() but there are
> subtle differences. It currently open codes all the tests. This is
> cumbersome to understand and error-prone in case the intersecting tests need
> to be updated.
> 
> Factor out the common part - testing whether the task is allowed on the CPU
> at all regardless of the CPU state - into task_allowed_on_cpu() and make
> both is_cpu_allowed() and SCX's task_can_run_on_remote_rq() use it. As the
> code is now linked between the two and each contains only the extra tests
> that differ between them, it's less error-prone when the conditions need to
> be updated. Also, improve the comment to explain why they are different.
> 
> Signed-off-by: Tejun Heo <tj@...nel.org>
> Suggested-by: Peter Zijlstra <peterz@...radead.org>
> ---
>  kernel/sched/core.c  |  4 ++--
>  kernel/sched/ext.c   | 21 ++++++++++++++++-----
>  kernel/sched/sched.h | 18 ++++++++++++++++++
>  3 files changed, 36 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index d2ccc2c4b4d3..3c22d0c8eed1 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -2311,7 +2311,7 @@ static inline bool rq_has_pinned_tasks(struct rq *rq)
>  static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
>  {
>  	/* When not in the task's cpumask, no point in looking further. */
> -	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> +	if (!task_allowed_on_cpu(p, cpu))
>  		return false;
>  
>  	/* migrate_disabled() must be allowed to finish. */
> @@ -2320,7 +2320,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
>  
>  	/* Non kernel threads are not allowed during either online or offline. */
>  	if (!(p->flags & PF_KTHREAD))
> -		return cpu_active(cpu) && task_cpu_possible(cpu, p);
> +		return cpu_active(cpu);
>  
>  	/* KTHREAD_IS_PER_CPU is always allowed. */
>  	if (kthread_is_per_cpu(p))
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 7837a551022c..60a7eb7d8a9e 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -2224,19 +2224,30 @@ static void consume_local_task(struct rq *rq, struct scx_dispatch_q *dsq,
>  
>  #ifdef CONFIG_SMP
>  /*
> - * Similar to kernel/sched/core.c::is_cpu_allowed() but we're testing whether @p
> - * can be pulled to @rq.
> + * Similar to kernel/sched/core.c::is_cpu_allowed(). However, there are two
> + * differences:
> + *
> + * - is_cpu_allowed() asks "Can this task run on this CPU?" while
> + *   task_can_run_on_remote_rq() asks "Can the BPF scheduler migrate the task to
> + *   this CPU?".
> + *
> + *   While migration is disabled, is_cpu_allowed() has to say "yes" as the task
> + *   must be allowed to finish on the CPU that it's currently on regardless of
> + *   the CPU state. However, task_can_run_on_remote_rq() must say "no" as the
> + *   BPF scheduler shouldn't attempt to migrate a task which has migration
> + *   disabled.
> + *
> + * - The BPF scheduler is bypassed while the rq is offline and we can always say
> + *   no to the BPF scheduler initiated migrations while offline.
>   */
>  static bool task_can_run_on_remote_rq(struct task_struct *p, struct rq *rq)
>  {
>  	int cpu = cpu_of(rq);
>  
> -	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> +	if (!task_allowed_on_cpu(p, cpu))
>  		return false;
>  	if (unlikely(is_migration_disabled(p)))
>  		return false;
> -	if (!(p->flags & PF_KTHREAD) && unlikely(!task_cpu_possible(cpu, p)))
> -		return false;
>  	if (!scx_rq_online(rq))
>  		return false;
>  	return true;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 9b88a46d3fce..2b369d8a36b1 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2530,6 +2530,19 @@ extern void sched_balance_trigger(struct rq *rq);
>  extern int __set_cpus_allowed_ptr(struct task_struct *p, struct affinity_context *ctx);
>  extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
>  
> +extern inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)

This wants to be "static inline". no? I think we try and avoid "extern
inline".

> +{
> +	/* When not in the task's cpumask, no point in looking further. */
> +	if (!cpumask_test_cpu(cpu, p->cpus_ptr))
> +		return false;
> +
> +	/* Can @cpu run a user thread? */
> +	if (!(p->flags & PF_KTHREAD) && !task_cpu_possible(cpu, p))
> +		return false;
> +
> +	return true;
> +}
> +
>  static inline cpumask_t *alloc_user_cpus_ptr(int node)
>  {
>  	/*
> @@ -2563,6 +2576,11 @@ extern int push_cpu_stop(void *arg);
>  
>  #else /* !CONFIG_SMP: */
>  
> +static inline bool task_allowed_on_cpu(struct task_struct *p, int cpu)
> +{
> +	return true;
> +}
> +
>  static inline int __set_cpus_allowed_ptr(struct task_struct *p,
>  					 struct affinity_context *ctx)
>  {
> -- 
> 2.46.0
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ