[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20210122090239.ajpqvim3xuipawqn@vireshk-i7>
Date: Fri, 22 Jan 2021 14:32:39 +0530
From: Viresh Kumar <viresh.kumar@...aro.org>
To: Ionela Voinescu <ionela.voinescu@....com>
Cc: Rafael Wysocki <rjw@...ysocki.net>, linux-pm@...r.kernel.org,
Vincent Guittot <vincent.guittot@...aro.org>,
Peter Puhov <peter.puhov@...aro.org>, Jeremy.Linton@....com,
linux-kernel@...r.kernel.org
Subject: Re: [RFC V2 2/2] cpufreq: cppc: Add support for frequency invariance
On 19-01-21, 19:17, Ionela Voinescu wrote:
> Hi,
>
> Do you know of a current platform that would benefit from this, that we
> could run some tests on?
Thunderx2 is one.
> On Tuesday 15 Dec 2020 at 16:46:36 (+0530), Viresh Kumar wrote:
> > @@ -243,7 +256,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
> > struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu];
> > struct cppc_perf_caps *caps = &cpu_data->perf_caps;
> > unsigned int cpu = policy->cpu;
> > - int ret = 0;
> > + int ret = 0, i;
> >
> > cpu_data->cpu = cpu;
> > ret = cppc_get_perf_caps(cpu, caps);
> > @@ -300,6 +313,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
> >
> > cpu_data->cur_policy = policy;
> >
> > + for_each_cpu(i, policy->cpus)
> > + per_cpu(cppc_f_i, i).max_freq = policy->cpuinfo.max_freq;
> > +
>
> Is policy->cpuinfo populated at this point?
The base has changed since the time I posted the patch, but yes this
routine itself updates min/max freq in cpuinfo at an earlier point.
> > +static void cppc_scale_freq_tick_workfn(struct kthread_work *work)
> > +{
> > + struct cppc_freq_invariance *cppc_fi;
> > + struct cppc_perf_fb_ctrs fb_ctrs = {0};
> > + int cpu = raw_smp_processor_id();
> > + struct cppc_cpudata *cpudata = all_cpu_data[cpu];
> > + u64 rate;
> > +
> > + cppc_fi = container_of(work, struct cppc_freq_invariance, work);
> > +
> > + if (cppc_get_perf_ctrs(cpu, &fb_ctrs)) {
> > + pr_info("%s: cppc_get_perf_ctrs() failed\n", __func__);
> > + return;
> > + }
> > +
> > + rate = cppc_get_rate_from_fbctrs(cpudata, cppc_fi->prev_perf_fb_ctrs, fb_ctrs);
> > + cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
> > +
> > + rate <<= SCHED_CAPACITY_SHIFT;
> > + per_cpu(freq_scale, cpu) = div64_u64(rate, cppc_fi->max_freq);
>
> It will save you some computation by skipping the intermediary frequency
> scale transition. For this computation you're obtaining current
> performance from counters, on the CPPC abstract performance scale,
> then you're converting it to a current frequency, which then gets
> translated again to a scale factor on the [0,1024] scale.
>
> You probably want to keep the sanitation done in
> cppc_get_rate_from_fbctrs() on the counter values, but you could skip
> the call to cppc_cpufreq_perf_to_khz(), and use obtained performance
> together with caps->highest_perf, or caps->nominal_perf instead of
> cppc_fi->max_freq, in this function.
Something like this ?
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 0d7a950f3c9f..d4d7fb0dc918 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -389,9 +389,9 @@ static inline u64 get_delta(u64 t1, u64 t0)
return (u32)t1 - (u32)t0;
}
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
- struct cppc_perf_fb_ctrs fb_ctrs_t0,
- struct cppc_perf_fb_ctrs fb_ctrs_t1)
+static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1)
{
u64 delta_reference, delta_delivered;
u64 reference_perf, delivered_perf;
@@ -404,11 +404,20 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
fb_ctrs_t0.delivered);
/* Check to avoid divide-by zero */
- if (delta_reference || delta_delivered)
- delivered_perf = (reference_perf * delta_delivered) /
- delta_reference;
- else
- delivered_perf = cpu_data->perf_ctrls.desired_perf;
+ if (!delta_reference && !delta_delivered)
+ return cpu_data->perf_ctrls.desired_perf;
+
+ return (reference_perf * delta_delivered) / delta_reference;
+}
+
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
+ struct cppc_perf_fb_ctrs fb_ctrs_t0,
+ struct cppc_perf_fb_ctrs fb_ctrs_t1)
+{
+ u64 delivered_perf;
+
+ delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0,
+ fb_ctrs_t1);
return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
}
@@ -539,21 +548,23 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
struct cppc_freq_invariance *cppc_fi;
struct cppc_perf_fb_ctrs fb_ctrs = {0};
int cpu = raw_smp_processor_id();
- u64 rate;
+ struct cppc_cpudata *cpu_data;
+ u64 perf;
cppc_fi = container_of(work, struct cppc_freq_invariance, work);
+ cpu_data = cppc_fi->cpu_data;
if (cppc_get_perf_ctrs(cpu, &fb_ctrs)) {
pr_info("%s: cppc_get_perf_ctrs() failed\n", __func__);
return;
}
- rate = cppc_get_rate_from_fbctrs(cppc_fi->cpu_data,
- cppc_fi->prev_perf_fb_ctrs, fb_ctrs);
cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
+ perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs,
+ fb_ctrs);
- rate <<= SCHED_CAPACITY_SHIFT;
- per_cpu(freq_scale, cpu) = div64_u64(rate, cppc_fi->max_freq);
+ perf <<= SCHED_CAPACITY_SHIFT;
+ per_cpu(freq_scale, cpu) = div64_u64(perf , cpu_data->perf_caps->highest_perf);
}
static void cppc_irq_work(struct irq_work *irq_work)
> Also, to optimise it further, you can compute a reference scale (from
> reference performance and highest/nominal performance as done in
> freq_inv_set_max_ratio() - arch/arm64/kernel/topology.c, and use that
> instead in further freq scale computations.
--
viresh
Powered by blists - more mailing lists