[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20208c07-1ea8-43a0-a499-51c9fd63b037@arm.com>
Date: Tue, 2 Jul 2024 21:41:30 +0100
From: Hongyan Xia <hongyan.xia2@....com>
To: Tejun Heo <tj@...nel.org>
Cc: rafael@...nel.org, viresh.kumar@...aro.org, linux-pm@...r.kernel.org,
void@...ifault.com, linux-kernel@...r.kernel.org, kernel-team@...a.com,
mingo@...hat.com, peterz@...radead.org, David Vernet <dvernet@...a.com>,
"Rafael J . Wysocki" <rafael.j.wysocki@...el.com>
Subject: Re: [PATCH 2/2] sched_ext: Add cpuperf support
On 02/07/2024 18:56, Tejun Heo wrote:
> Hello,
>
> So, maybe something like this. It's not the prettiest but avoids adding
> indirect calls to fair and rt while allowing sched_ext to report what the
> BPF scheduler wants. Only compile tested. Would something like this work for
> the use cases you have on mind?
>
> Thanks.
>
> Index: work/kernel/sched/core.c
> ===================================================================
> --- work.orig/kernel/sched/core.c
> +++ work/kernel/sched/core.c
> @@ -1671,6 +1671,20 @@ static inline void uclamp_rq_dec_id(stru
> }
> }
>
> +bool sched_uclamp_enabled(void)
> +{
> + return true;
> +}
> +
> +static bool class_supports_uclamp(const struct sched_class *class)
> +{
> + if (likely(class->uclamp_enabled == sched_uclamp_enabled))
> + return true;
> + if (!class->uclamp_enabled)
> + return false;
> + return class->uclamp_enabled();
> +}
> +
> static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> {
> enum uclamp_id clamp_id;
> @@ -1684,7 +1698,7 @@ static inline void uclamp_rq_inc(struct
> if (!static_branch_unlikely(&sched_uclamp_used))
> return;
>
> - if (unlikely(!p->sched_class->uclamp_enabled))
> + if (class_supports_uclamp(p->sched_class))
> return;
>
> for_each_clamp_id(clamp_id)
> @@ -1708,7 +1722,7 @@ static inline void uclamp_rq_dec(struct
> if (!static_branch_unlikely(&sched_uclamp_used))
> return;
>
> - if (unlikely(!p->sched_class->uclamp_enabled))
> + if (class_supports_uclamp(p->sched_class))
> return;
>
> for_each_clamp_id(clamp_id)
> Index: work/kernel/sched/ext.c
> ===================================================================
> --- work.orig/kernel/sched/ext.c
> +++ work/kernel/sched/ext.c
> @@ -116,10 +116,17 @@ enum scx_ops_flags {
> */
> SCX_OPS_SWITCH_PARTIAL = 1LLU << 3,
>
> + /*
> + * Disable built-in uclamp support. Can be useful when the BPF scheduler
> + * wants to implement custom uclamp support.
> + */
> + SCX_OPS_DISABLE_UCLAMP = 1LLU << 4,
> +
> SCX_OPS_ALL_FLAGS = SCX_OPS_KEEP_BUILTIN_IDLE |
> SCX_OPS_ENQ_LAST |
> SCX_OPS_ENQ_EXITING |
> - SCX_OPS_SWITCH_PARTIAL,
> + SCX_OPS_SWITCH_PARTIAL |
> + SCX_OPS_DISABLE_UCLAMP,
> };
>
> /* argument container for ops.init_task() */
> @@ -3437,6 +3444,13 @@ static void switched_from_scx(struct rq
> static void wakeup_preempt_scx(struct rq *rq, struct task_struct *p,int wake_flags) {}
> static void switched_to_scx(struct rq *rq, struct task_struct *p) {}
>
> +#ifdef CONFIG_UCLAMP_TASK
> +static bool uclamp_enabled_scx(void)
> +{
> + return !(scx_ops.flags & SCX_OPS_DISABLE_UCLAMP);
> +}
> +#endif
> +
> int scx_check_setscheduler(struct task_struct *p, int policy)
> {
> lockdep_assert_rq_held(task_rq(p));
> @@ -3522,7 +3536,7 @@ DEFINE_SCHED_CLASS(ext) = {
> .update_curr = update_curr_scx,
>
> #ifdef CONFIG_UCLAMP_TASK
> - .uclamp_enabled = 1,
> + .uclamp_enabled = uclamp_enabled_scx,
> #endif
> };
>
> Index: work/kernel/sched/fair.c
> ===================================================================
> --- work.orig/kernel/sched/fair.c
> +++ work/kernel/sched/fair.c
> @@ -13228,9 +13228,7 @@ DEFINE_SCHED_CLASS(fair) = {
> .task_is_throttled = task_is_throttled_fair,
> #endif
>
> -#ifdef CONFIG_UCLAMP_TASK
> - .uclamp_enabled = 1,
> -#endif
> + SCHED_CLASS_UCLAMP_ENABLED
> };
>
> #ifdef CONFIG_SCHED_DEBUG
> Index: work/kernel/sched/rt.c
> ===================================================================
> --- work.orig/kernel/sched/rt.c
> +++ work/kernel/sched/rt.c
> @@ -2681,9 +2681,7 @@ DEFINE_SCHED_CLASS(rt) = {
> .task_is_throttled = task_is_throttled_rt,
> #endif
>
> -#ifdef CONFIG_UCLAMP_TASK
> - .uclamp_enabled = 1,
> -#endif
> + SCHED_CLASS_UCLAMP_ENABLED
> };
>
> #ifdef CONFIG_RT_GROUP_SCHED
> Index: work/kernel/sched/sched.h
> ===================================================================
> --- work.orig/kernel/sched/sched.h
> +++ work/kernel/sched/sched.h
> @@ -2339,11 +2339,6 @@ struct affinity_context {
> extern s64 update_curr_common(struct rq *rq);
>
> struct sched_class {
> -
> -#ifdef CONFIG_UCLAMP_TASK
> - int uclamp_enabled;
> -#endif
> -
> void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
> void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
> void (*yield_task) (struct rq *rq);
> @@ -2405,8 +2400,21 @@ struct sched_class {
> #ifdef CONFIG_SCHED_CORE
> int (*task_is_throttled)(struct task_struct *p, int cpu);
> #endif
> +
> +#ifdef CONFIG_UCLAMP_TASK
> + bool (*uclamp_enabled)(void);
> +#endif
> };
>
> +#ifdef CONFIG_UCLAMP_TASK
> +bool sched_uclamp_enabled(void);
> +
> +#define SCHED_CLASS_UCLAMP_ENABLED \
> + .uclamp_enabled = sched_uclamp_enabled,
> +#else
> +#define SCHED_CLASS_UCLAMP_ENABLED
> +#endif
> +
> static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
> {
> WARN_ON_ONCE(rq->curr != prev);
Looks good to me!
Actually, if we are okay with changing the sched_class struct and
touching the code of other classes, I wonder if a cleaner solution is
just to completely remove sched_class->uclamp_enabled and let each class
decide what to do in enqueue and dequeue, so instead of
uclamp_inc/dec();
p->sched_class->enqueue/dequeue_task();
we can just
p->sched_class->enqueue/dequeue_task();
do_uclamp_inside_each_class();
and we export uclamp_inc/dec() functions from core.c to RT, fair and
ext. For RT and fair, just
enqueue/dequeue_task_fair/rt();
uclamp_inc/dec();
For ext, maybe expose bpf_uclamp_inc/dec() to the custom scheduler. If a
scheduler wants to reuse the current uclamp implementation, just call
these. If not, skip them and implement its own.
Powered by blists - more mailing lists