lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <20210122090239.ajpqvim3xuipawqn@vireshk-i7>
Date:   Fri, 22 Jan 2021 14:32:39 +0530
From:   Viresh Kumar <viresh.kumar@...aro.org>
To:     Ionela Voinescu <ionela.voinescu@....com>
Cc:     Rafael Wysocki <rjw@...ysocki.net>, linux-pm@...r.kernel.org,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Peter Puhov <peter.puhov@...aro.org>, Jeremy.Linton@....com,
        linux-kernel@...r.kernel.org
Subject: Re: [RFC V2 2/2] cpufreq: cppc: Add support for frequency invariance

On 19-01-21, 19:17, Ionela Voinescu wrote:
> Hi,
> 
> Do you know of a current platform that would benefit from this, that we
> could run some tests on?

Thunderx2 is one.

> On Tuesday 15 Dec 2020 at 16:46:36 (+0530), Viresh Kumar wrote:
> > @@ -243,7 +256,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
> >  	struct cppc_cpudata *cpu_data = all_cpu_data[policy->cpu];
> >  	struct cppc_perf_caps *caps = &cpu_data->perf_caps;
> >  	unsigned int cpu = policy->cpu;
> > -	int ret = 0;
> > +	int ret = 0, i;
> >  
> >  	cpu_data->cpu = cpu;
> >  	ret = cppc_get_perf_caps(cpu, caps);
> > @@ -300,6 +313,9 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
> >  
> >  	cpu_data->cur_policy = policy;
> >  
> > +	for_each_cpu(i, policy->cpus)
> > +		per_cpu(cppc_f_i, i).max_freq = policy->cpuinfo.max_freq;
> > +
> 
> Is policy->cpuinfo populated at this point?

The base has changed since the time I posted the patch, but yes this
routine itself updates min/max freq in cpuinfo at an earlier point.

> > +static void cppc_scale_freq_tick_workfn(struct kthread_work *work)
> > +{
> > +	struct cppc_freq_invariance *cppc_fi;
> > +	struct cppc_perf_fb_ctrs fb_ctrs = {0};
> > +	int cpu = raw_smp_processor_id();
> > +	struct cppc_cpudata *cpudata = all_cpu_data[cpu];
> > +	u64 rate;
> > +
> > +	cppc_fi = container_of(work, struct cppc_freq_invariance, work);
> > +
> > +	if (cppc_get_perf_ctrs(cpu, &fb_ctrs)) {
> > +		pr_info("%s: cppc_get_perf_ctrs() failed\n", __func__);
> > +		return;
> > +	}
> > +
> > +	rate = cppc_get_rate_from_fbctrs(cpudata, cppc_fi->prev_perf_fb_ctrs, fb_ctrs);
> > +	cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
> > +
> > +	rate <<= SCHED_CAPACITY_SHIFT;
> > +	per_cpu(freq_scale, cpu) = div64_u64(rate, cppc_fi->max_freq);
> 
> It will save you some computation by skipping the intermediary frequency
> scale transition. For this computation you're obtaining current
> performance from counters, on the CPPC abstract performance scale,
> then you're converting it to a current frequency, which then gets
> translated again to a scale factor on the [0,1024] scale.
> 
> You probably want to keep the sanitation done in
> cppc_get_rate_from_fbctrs() on the counter values, but you could skip
> the call to cppc_cpufreq_perf_to_khz(), and use obtained performance
> together with caps->highest_perf, or caps->nominal_perf instead of
> cppc_fi->max_freq, in this function.

Something like this ?

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 0d7a950f3c9f..d4d7fb0dc918 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -389,9 +389,9 @@ static inline u64 get_delta(u64 t1, u64 t0)
        return (u32)t1 - (u32)t0;
 }
 
-static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
-                                    struct cppc_perf_fb_ctrs fb_ctrs_t0,
-                                    struct cppc_perf_fb_ctrs fb_ctrs_t1)
+static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
+                                struct cppc_perf_fb_ctrs fb_ctrs_t0,
+                                struct cppc_perf_fb_ctrs fb_ctrs_t1)
 {
        u64 delta_reference, delta_delivered;
        u64 reference_perf, delivered_perf;
@@ -404,11 +404,20 @@ static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
                                    fb_ctrs_t0.delivered);
 
        /* Check to avoid divide-by zero */
-       if (delta_reference || delta_delivered)
-               delivered_perf = (reference_perf * delta_delivered) /
-                                       delta_reference;
-       else
-               delivered_perf = cpu_data->perf_ctrls.desired_perf;
+       if (!delta_reference && !delta_delivered)
+               return cpu_data->perf_ctrls.desired_perf;
+
+       return (reference_perf * delta_delivered) / delta_reference;
+}
+
+static int cppc_get_rate_from_fbctrs(struct cppc_cpudata *cpu_data,
+                                    struct cppc_perf_fb_ctrs fb_ctrs_t0,
+                                    struct cppc_perf_fb_ctrs fb_ctrs_t1)
+{
+       u64 delivered_perf;
+
+       delivered_perf = cppc_perf_from_fbctrs(cpu_data, fb_ctrs_t0,
+                                              fb_ctrs_t1);
 
        return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
 }
@@ -539,21 +548,23 @@ static void cppc_scale_freq_workfn(struct kthread_work *work)
        struct cppc_freq_invariance *cppc_fi;
        struct cppc_perf_fb_ctrs fb_ctrs = {0};
        int cpu = raw_smp_processor_id();
-       u64 rate;
+       struct cppc_cpudata *cpu_data;
+       u64 perf;
 
        cppc_fi = container_of(work, struct cppc_freq_invariance, work);
+       cpu_data = cppc_fi->cpu_data;
 
        if (cppc_get_perf_ctrs(cpu, &fb_ctrs)) {
                pr_info("%s: cppc_get_perf_ctrs() failed\n", __func__);
                return;
        }
 
-       rate = cppc_get_rate_from_fbctrs(cppc_fi->cpu_data,
-                                        cppc_fi->prev_perf_fb_ctrs, fb_ctrs);
        cppc_fi->prev_perf_fb_ctrs = fb_ctrs;
+       perf = cppc_perf_from_fbctrs(cpu_data, cppc_fi->prev_perf_fb_ctrs,
+                                    fb_ctrs);
 
-       rate <<= SCHED_CAPACITY_SHIFT;
-       per_cpu(freq_scale, cpu) = div64_u64(rate, cppc_fi->max_freq);
+       perf <<= SCHED_CAPACITY_SHIFT;
+       per_cpu(freq_scale, cpu) = div64_u64(perf , cpu_data->perf_caps->highest_perf);
 }
 
 static void cppc_irq_work(struct irq_work *irq_work)

> Also, to optimise it further, you can compute a reference scale (from
> reference performance and highest/nominal performance as done in
> freq_inv_set_max_ratio() - arch/arm64/kernel/topology.c, and use that
> instead in further freq scale computations.

-- 
viresh

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ