[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAB8ipk9+4p29iE8HSiRrcc8DanCcO2U3+HRVY5LXLJRWXFMpOw@mail.gmail.com>
Date: Wed, 17 Jul 2024 16:26:16 +0800
From: Xuewen Yan <xuewen.yan94@...il.com>
To: Qais Yousef <qyousef@...alina.io>
Cc: Ingo Molnar <mingo@...nel.org>, Peter Zijlstra <peterz@...radead.org>,
Vincent Guittot <vincent.guittot@...aro.org>, Dietmar Eggemann <dietmar.eggemann@....com>,
linux-kernel@...r.kernel.org, Lukasz Luba <lukasz.luba@....com>,
Wei Wang <wvw@...gle.com>, Rick Yiu <rickyiu@...gle.com>, Chung-Kai Mei <chungkai@...gle.com>,
Xuewen Yan <xuewen.yan@...soc.com>
Subject: Re: [PATCH 2/3] sched/fair: Generalize misfit lb by adding a misfit reason
Hi Qais
On Sat, Dec 9, 2023 at 9:19 AM Qais Yousef <qyousef@...alina.io> wrote:
>
> MISFIT_PERF is what is currently implemented. It indicates that the task
> requires moving to a more performant CPU.
>
> Guard misfit handling in find_busiest_queue and update_sd_pick_busiest
> with MISFIT_PERF. They explicitly assume this type of misfit
>
> This generalizes misfit lb to allow for more types of misfits to be
> added. Like MISFIT_POWER to help uclamp_max being more effective, and
> MISFIT_LATENCY to help latency sensitive tasks to be spread in
> oversubscribe conditions.
>
> Signed-off-by: Qais Yousef (Google) <qyousef@...alina.io>
> ---
> kernel/sched/fair.c | 28 +++++++++++++++++++++++-----
> kernel/sched/sched.h | 8 ++++++++
> 2 files changed, 31 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index eb9e891182cc..dd49b89a6e3e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5063,7 +5063,8 @@ static inline int task_fits_cpu(struct task_struct *p, int cpu)
> return (util_fits_cpu(util, uclamp_min, uclamp_max, cpu) > 0);
> }
>
> -static inline int is_misfit_task(struct task_struct *p, struct rq *rq)
> +static inline int is_misfit_task(struct task_struct *p, struct rq *rq,
> + misfit_reason_t *reason)
> {
> if (!p || p->nr_cpus_allowed == 1)
> return 0;
> @@ -5071,16 +5072,21 @@ static inline int is_misfit_task(struct task_struct *p, struct rq *rq)
> if (task_fits_cpu(p, cpu_of(rq)))
> return 0;
>
> + if (reason)
> + *reason = MISFIT_PERF;
> return 1;
> }
>
> static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
> {
> + misfit_reason_t reason;
> +
> if (!sched_asym_cpucap_active())
> return;
>
> - if (!is_misfit_task(p, rq)) {
> + if (!is_misfit_task(p, rq, &reason)) {
> rq->misfit_task_load = 0;
> + rq->misfit_reason = -1;
> return;
> }
>
> @@ -5089,6 +5095,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
> * task_h_load() returns 0.
> */
> rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
> + rq->misfit_reason = reason;
> }
>
> #else /* CONFIG_SMP */
> @@ -9111,7 +9118,7 @@ static int detach_tasks(struct lb_env *env)
>
> case migrate_misfit:
> /* This is not a misfit task */
> - if (!is_misfit_task(p, cpu_rq(env->src_cpu)))
> + if (!is_misfit_task(p, cpu_rq(env->src_cpu), NULL))
> goto next;
>
> env->imbalance = 0;
> @@ -9426,6 +9433,7 @@ struct sg_lb_stats {
> unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
> unsigned int group_smt_balance; /* Task on busy SMT be moved */
> unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
> + misfit_reason_t group_misfit_reason;
> #ifdef CONFIG_NUMA_BALANCING
> unsigned int nr_numa_running;
> unsigned int nr_preferred_running;
> @@ -9904,6 +9912,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
> /* Check for a misfit task on the cpu */
> if (sgs->group_misfit_task_load < rq->misfit_task_load) {
> sgs->group_misfit_task_load = rq->misfit_task_load;
> + sgs->group_misfit_reason = rq->misfit_reason;
> *sg_status |= SG_OVERLOAD;
> }
> } else if ((env->idle != CPU_NOT_IDLE) &&
> @@ -9969,6 +9978,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
> */
> if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
> (sgs->group_type == group_misfit_task) &&
> + (sgs->group_misfit_reason == MISFIT_PERF) &&
> (!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) ||
> sds->local_stat.group_type != group_has_spare))
> return false;
> @@ -10193,6 +10203,7 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
>
> for_each_cpu(i, sched_group_span(group)) {
> struct rq *rq = cpu_rq(i);
> + misfit_reason_t reason;
> unsigned int local;
>
> sgs->group_load += cpu_load_without(rq, p);
> @@ -10212,9 +10223,15 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
>
> /* Check if task fits in the CPU */
> if (sd->flags & SD_ASYM_CPUCAPACITY &&
> - sgs->group_misfit_task_load &&
> - !is_misfit_task(p, rq))
> + sgs->group_misfit_task_load) {
> + if (!is_misfit_task(p, rq, &reason)) {
> sgs->group_misfit_task_load = 0;
> + sgs->group_misfit_reason = -1;
> + } else {
> + sgs->group_misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
> + sgs->group_misfit_reason = reason;
> + }
> + }
>
> }
>
> @@ -11008,6 +11025,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> * average load.
> */
> if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
> + rq->misfit_reason == MISFIT_PERF &&
In Android, I found this would cause a task loop to change the CPUs.
Maybe this should be removed. Because for the same capacity cpus, we
should skip this cpu when nr_running=1.
> !capacity_greater(capacity_of(env->dst_cpu), capacity) &&
> nr_running == 1)
> continue;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index e58a54bda77d..399b6526afab 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -962,6 +962,10 @@ struct balance_callback {
> void (*func)(struct rq *rq);
> };
>
> +typedef enum misfit_reason {
> + MISFIT_PERF, /* Requires moving to a more performant CPU */
> +} misfit_reason_t;
> +
> /*
> * This is the main, per-CPU runqueue data structure.
> *
> @@ -1168,6 +1172,10 @@ struct rq {
> call_single_data_t cfsb_csd;
> struct list_head cfsb_csd_list;
> #endif
> +
> +#ifdef CONFIG_SMP
> + misfit_reason_t misfit_reason;
> +#endif
> };
>
> #ifdef CONFIG_FAIR_GROUP_SCHED
> --
> 2.34.1
>
Powered by blists - more mailing lists