[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <331552975e858911db66bc78c2c8e720@codeaurora.org>
Date: Mon, 30 Jul 2018 12:35:27 -0700
From: skannan@...eaurora.org
To: Quentin Perret <quentin.perret@....com>
Cc: peterz@...radead.org, rjw@...ysocki.net,
linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org,
gregkh@...uxfoundation.org, mingo@...hat.com,
dietmar.eggemann@....com, morten.rasmussen@....com,
chris.redpath@....com, patrick.bellasi@....com,
valentin.schneider@....com, vincent.guittot@...aro.org,
thara.gopinath@...aro.org, viresh.kumar@...aro.org,
tkjos@...gle.com, joel@...lfernandes.org, smuckle@...gle.com,
adharmap@...cinc.com, skannan@...cinc.com, pkondeti@...eaurora.org,
juri.lelli@...hat.com, edubezval@...il.com,
srinivas.pandruvada@...ux.intel.com, currojerez@...eup.net,
javi.merino@...nel.org, linux-pm-owner@...r.kernel.org
Subject: Re: [PATCH v5 10/14] sched/cpufreq: Refactor the utilization
aggregation method
On 2018-07-24 05:25, Quentin Perret wrote:
> Schedutil aggregates the PELT signals of CFS, RT, DL and IRQ in order
> to decide which frequency to request. Energy Aware Scheduling (EAS)
> needs to be able to predict those requests to assess the energy impact
> of scheduling decisions. However, the PELT signals aggregation is only
> done in schedutil for now, hence making it hard to synchronize it with
> EAS.
>
> To address this issue, introduce schedutil_freq_util() to perform the
> aforementioned aggregation and make it available to other parts of the
> scheduler. Since frequency selection and energy estimation still need
> to deal with RT and DL signals slightly differently,
> schedutil_freq_util()
> is called with a different 'type' parameter in those two contexts, and
> returns an aggregated utilization signal accordingly.
>
> Cc: Ingo Molnar <mingo@...hat.com>
> Cc: Peter Zijlstra <peterz@...radead.org>
> Suggested-by: Peter Zijlstra <peterz@...radead.org>
> Signed-off-by: Quentin Perret <quentin.perret@....com>
> ---
> kernel/sched/cpufreq_schedutil.c | 86 +++++++++++++++++++++-----------
> kernel/sched/sched.h | 14 ++++++
> 2 files changed, 72 insertions(+), 28 deletions(-)
>
> diff --git a/kernel/sched/cpufreq_schedutil.c
> b/kernel/sched/cpufreq_schedutil.c
> index 810193c8e4cd..af86050edcf5 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -198,15 +198,15 @@ static unsigned int get_next_freq(struct
> sugov_policy *sg_policy,
> * based on the task model parameters and gives the minimal
> utilization
> * required to meet deadlines.
> */
> -static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
> +unsigned long schedutil_freq_util(int cpu, unsigned long util_cfs,
> + enum schedutil_type type)
> {
> - struct rq *rq = cpu_rq(sg_cpu->cpu);
> + struct rq *rq = cpu_rq(cpu);
> unsigned long util, irq, max;
>
> - sg_cpu->max = max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
> - sg_cpu->bw_dl = cpu_bw_dl(rq);
> + max = arch_scale_cpu_capacity(NULL, cpu);
>
> - if (rt_rq_is_runnable(&rq->rt))
> + if (type == frequency_util && rt_rq_is_runnable(&rq->rt))
> return max;
>
> /*
> @@ -224,20 +224,33 @@ static unsigned long sugov_get_util(struct
> sugov_cpu *sg_cpu)
> * utilization (PELT windows are synchronized) we can directly add
> them
> * to obtain the CPU's actual utilization.
> */
> - util = cpu_util_cfs(rq);
> + util = util_cfs;
> util += cpu_util_rt(rq);
>
> - /*
> - * We do not make cpu_util_dl() a permanent part of this sum because
> we
> - * want to use cpu_bw_dl() later on, but we need to check if the
> - * CFS+RT+DL sum is saturated (ie. no idle time) such that we select
> - * f_max when there is no idle time.
> - *
> - * NOTE: numerical errors or stop class might cause us to not quite
> hit
> - * saturation when we should -- something for later.
> - */
> - if ((util + cpu_util_dl(rq)) >= max)
> - return max;
> + if (type == frequency_util) {
> + /*
> + * For frequency selection we do not make cpu_util_dl() a
> + * permanent part of this sum because we want to use
> + * cpu_bw_dl() later on, but we need to check if the
> + * CFS+RT+DL sum is saturated (ie. no idle time) such
> + * that we select f_max when there is no idle time.
> + *
> + * NOTE: numerical errors or stop class might cause us
> + * to not quite hit saturation when we should --
> + * something for later.
> + */
> +
> + if ((util + cpu_util_dl(rq)) >= max)
> + return max;
> + } else {
> + /*
> + * OTOH, for energy computation we need the estimated
> + * running time, so include util_dl and ignore dl_bw.
> + */
> + util += cpu_util_dl(rq);
> + if (util >= max)
> + return max;
> + }
If it's going to be a different aggregation from what's done for
frequency guidance, I don't see the point of having this inside
schedutil. Why not keep it inside the scheduler files? Also, it seems
weird to use a governor's code when it might not actually be in use.
What if someone is using ondemand, conservative, performance, etc?
>
> /*
> * There is still idle time; further improve the number by using the
> @@ -252,17 +265,34 @@ static unsigned long sugov_get_util(struct
> sugov_cpu *sg_cpu)
> util /= max;
> util += irq;
>
> - /*
> - * Bandwidth required by DEADLINE must always be granted while, for
> - * FAIR and RT, we use blocked utilization of IDLE CPUs as a
> mechanism
> - * to gracefully reduce the frequency when no tasks show up for
> longer
> - * periods of time.
> - *
> - * Ideally we would like to set bw_dl as min/guaranteed freq and util
> +
> - * bw_dl as requested freq. However, cpufreq is not yet ready for
> such
> - * an interface. So, we only do the latter for now.
> - */
> - return min(max, util + sg_cpu->bw_dl);
> + if (type == frequency_util) {
> + /*
> + * Bandwidth required by DEADLINE must always be granted
> + * while, for FAIR and RT, we use blocked utilization of
> + * IDLE CPUs as a mechanism to gracefully reduce the
> + * frequency when no tasks show up for longer periods of
> + * time.
> + *
> + * Ideally we would like to set bw_dl as min/guaranteed
> + * freq and util + bw_dl as requested freq. However,
> + * cpufreq is not yet ready for such an interface. So,
> + * we only do the latter for now.
> + */
> + util += cpu_bw_dl(rq);
> + }
Instead of all this indentation, can't you just return early without
doing the code inside the if?
> +
> + return min(max, util);
> +}
> +
> +static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu)
> +{
> + struct rq *rq = cpu_rq(sg_cpu->cpu);
> + unsigned long util = cpu_util_cfs(rq);
> +
> + sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
> + sg_cpu->bw_dl = cpu_bw_dl(rq);
> +
> + return schedutil_freq_util(sg_cpu->cpu, util, frequency_util);
> }
>
> /**
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 6d08ccd1e7a4..51e7f113ee23 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -2185,7 +2185,15 @@ static inline void cpufreq_update_util(struct
> rq *rq, unsigned int flags) {}
> # define arch_scale_freq_invariant() false
> #endif
>
> +enum schedutil_type {
> + frequency_util,
> + energy_util,
> +};
Please don't use lower case for enums. It's extremely confusing.
Thanks,
Saravana
Powered by blists - more mailing lists