lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20208c07-1ea8-43a0-a499-51c9fd63b037@arm.com>
Date: Tue, 2 Jul 2024 21:41:30 +0100
From: Hongyan Xia <hongyan.xia2@....com>
To: Tejun Heo <tj@...nel.org>
Cc: rafael@...nel.org, viresh.kumar@...aro.org, linux-pm@...r.kernel.org,
 void@...ifault.com, linux-kernel@...r.kernel.org, kernel-team@...a.com,
 mingo@...hat.com, peterz@...radead.org, David Vernet <dvernet@...a.com>,
 "Rafael J . Wysocki" <rafael.j.wysocki@...el.com>
Subject: Re: [PATCH 2/2] sched_ext: Add cpuperf support

On 02/07/2024 18:56, Tejun Heo wrote:
> Hello,
> 
> So, maybe something like this. It's not the prettiest but avoids adding
> indirect calls to fair and rt while allowing sched_ext to report what the
> BPF scheduler wants. Only compile tested. Would something like this work for
> the use cases you have on mind?
> 
> Thanks.
> 
> Index: work/kernel/sched/core.c
> ===================================================================
> --- work.orig/kernel/sched/core.c
> +++ work/kernel/sched/core.c
> @@ -1671,6 +1671,20 @@ static inline void uclamp_rq_dec_id(stru
>   	}
>   }
>   
> +bool sched_uclamp_enabled(void)
> +{
> +	return true;
> +}
> +
> +static bool class_supports_uclamp(const struct sched_class *class)
> +{
> +	if (likely(class->uclamp_enabled == sched_uclamp_enabled))
> +		return true;
> +	if (!class->uclamp_enabled)
> +		return false;
> +	return class->uclamp_enabled();
> +}
> +
>   static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
>   {
>   	enum uclamp_id clamp_id;
> @@ -1684,7 +1698,7 @@ static inline void uclamp_rq_inc(struct
>   	if (!static_branch_unlikely(&sched_uclamp_used))
>   		return;
>   
> -	if (unlikely(!p->sched_class->uclamp_enabled))
> +	if (class_supports_uclamp(p->sched_class))
>   		return;
>   
>   	for_each_clamp_id(clamp_id)
> @@ -1708,7 +1722,7 @@ static inline void uclamp_rq_dec(struct
>   	if (!static_branch_unlikely(&sched_uclamp_used))
>   		return;
>   
> -	if (unlikely(!p->sched_class->uclamp_enabled))
> +	if (class_supports_uclamp(p->sched_class))
>   		return;
>   
>   	for_each_clamp_id(clamp_id)
> Index: work/kernel/sched/ext.c
> ===================================================================
> --- work.orig/kernel/sched/ext.c
> +++ work/kernel/sched/ext.c
> @@ -116,10 +116,17 @@ enum scx_ops_flags {
>   	 */
>   	SCX_OPS_SWITCH_PARTIAL	= 1LLU << 3,
>   
> +	/*
> +	 * Disable built-in uclamp support. Can be useful when the BPF scheduler
> +	 * wants to implement custom uclamp support.
> +	 */
> +	SCX_OPS_DISABLE_UCLAMP	= 1LLU << 4,
> +
>   	SCX_OPS_ALL_FLAGS	= SCX_OPS_KEEP_BUILTIN_IDLE |
>   				  SCX_OPS_ENQ_LAST |
>   				  SCX_OPS_ENQ_EXITING |
> -				  SCX_OPS_SWITCH_PARTIAL,
> +				  SCX_OPS_SWITCH_PARTIAL |
> +				  SCX_OPS_DISABLE_UCLAMP,
>   };
>   
>   /* argument container for ops.init_task() */
> @@ -3437,6 +3444,13 @@ static void switched_from_scx(struct rq
>   static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
>   static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
>   
> +#ifdef CONFIG_UCLAMP_TASK
> +static bool uclamp_enabled_scx(void)
> +{
> +	return !(scx_ops.flags & SCX_OPS_DISABLE_UCLAMP);
> +}
> +#endif
> +
>   int scx_check_setscheduler(struct task_struct *p, int policy)
>   {
>   	lockdep_assert_rq_held(task_rq(p));
> @@ -3522,7 +3536,7 @@ DEFINE_SCHED_CLASS(ext) = {
>   	.update_curr		= update_curr_scx,
>   
>   #ifdef CONFIG_UCLAMP_TASK
> -	.uclamp_enabled		= 1,
> +	.uclamp_enabled		= uclamp_enabled_scx,
>   #endif
>   };
>   
> Index: work/kernel/sched/fair.c
> ===================================================================
> --- work.orig/kernel/sched/fair.c
> +++ work/kernel/sched/fair.c
> @@ -13228,9 +13228,7 @@ DEFINE_SCHED_CLASS(fair) = {
>   	.task_is_throttled	= task_is_throttled_fair,
>   #endif
>   
> -#ifdef CONFIG_UCLAMP_TASK
> -	.uclamp_enabled		= 1,
> -#endif
> +	SCHED_CLASS_UCLAMP_ENABLED
>   };
>   
>   #ifdef CONFIG_SCHED_DEBUG
> Index: work/kernel/sched/rt.c
> ===================================================================
> --- work.orig/kernel/sched/rt.c
> +++ work/kernel/sched/rt.c
> @@ -2681,9 +2681,7 @@ DEFINE_SCHED_CLASS(rt) = {
>   	.task_is_throttled	= task_is_throttled_rt,
>   #endif
>   
> -#ifdef CONFIG_UCLAMP_TASK
> -	.uclamp_enabled		= 1,
> -#endif
> +	SCHED_CLASS_UCLAMP_ENABLED
>   };
>   
>   #ifdef CONFIG_RT_GROUP_SCHED
> Index: work/kernel/sched/sched.h
> ===================================================================
> --- work.orig/kernel/sched/sched.h
> +++ work/kernel/sched/sched.h
> @@ -2339,11 +2339,6 @@ struct affinity_context {
>   extern s64 update_curr_common(struct rq *rq);
>   
>   struct sched_class {
> -
> -#ifdef CONFIG_UCLAMP_TASK
> -	int uclamp_enabled;
> -#endif
> -
>   	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
>   	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
>   	void (*yield_task)   (struct rq *rq);
> @@ -2405,8 +2400,21 @@ struct sched_class {
>   #ifdef CONFIG_SCHED_CORE
>   	int (*task_is_throttled)(struct task_struct *p, int cpu);
>   #endif
> +
> +#ifdef CONFIG_UCLAMP_TASK
> +	bool (*uclamp_enabled)(void);
> +#endif
>   };
>   
> +#ifdef CONFIG_UCLAMP_TASK
> +bool sched_uclamp_enabled(void);
> +
> +#define SCHED_CLASS_UCLAMP_ENABLED	\
> +	.uclamp_enabled = sched_uclamp_enabled,
> +#else
> +#define SCHED_CLASS_UCLAMP_ENABLED
> +#endif
> +
>   static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
>   {
>   	WARN_ON_ONCE(rq->curr != prev);

Looks good to me!

Actually, if we are okay with changing the sched_class struct and 
touching the code of other classes, I wonder if a cleaner solution is 
just to completely remove sched_class->uclamp_enabled and let each class 
decide what to do in enqueue and dequeue, so instead of

	uclamp_inc/dec();
	p->sched_class->enqueue/dequeue_task();

we can just

	p->sched_class->enqueue/dequeue_task();
		do_uclamp_inside_each_class();

and we export uclamp_inc/dec() functions from core.c to RT, fair and 
ext. For RT and fair, just

	enqueue/dequeue_task_fair/rt();
		uclamp_inc/dec();

For ext, maybe expose bpf_uclamp_inc/dec() to the custom scheduler. If a 
scheduler wants to reuse the current uclamp implementation, just call 
these. If not, skip them and implement its own.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ