[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230418113459.12860-7-sumitg@nvidia.com>
Date: Tue, 18 Apr 2023 17:04:59 +0530
From: Sumit Gupta <sumitg@...dia.com>
To: <viresh.kumar@...aro.org>, <rafael@...nel.org>,
<ionela.voinescu@....com>, <mark.rutland@....com>,
<sudeep.holla@....com>, <lpieralisi@...nel.org>,
<catalin.marinas@....com>, <will@...nel.org>
CC: <linux-pm@...r.kernel.org>, <linux-arm-kernel@...ts.infradead.org>,
<linux-kernel@...r.kernel.org>, <linux-tegra@...r.kernel.org>,
<treding@...dia.com>, <jonathanh@...dia.com>, <vsethi@...dia.com>,
<sdonthineni@...dia.com>, <sanjayc@...dia.com>,
<ksitaraman@...dia.com>, <bbasu@...dia.com>, <sumitg@...dia.com>
Subject: [Patch 6/6] cpufreq: CPPC: use wq to read amu counters on target cpu
ARM cores which implement the Activity Monitor Unit (AMU)
use Functional Fixed Hardware (FFH) to map AMU counters to
Delivered_Counter and Reference_Counter registers. Each
sysreg is read separately with a smp_call_function_single
call. So, total four IPI's are used, one per register.
Due to this, the AMU's core counter and constant counter
sampling can happen at a non-consistent time interval if
an IPI is handled late. This results in unstable frequency
value from "cpuinfo_cur_req" node sometimes. To fix, queue
work on target CPU to read all counters synchronously in
sequence. This helps to remove the inter-IPI latency and
make sure that both the counters are sampled at a close
time interval.
Without this change we observed that the re-generated value
of CPU Frequency from AMU counters sometimes deviates by
~25% as the counters are read at non-determenistic time.
Currently, kept the change specific to Tegra241. It can be
applied to other SoC's having AMU if same issue is observed.
Signed-off-by: Sumit Gupta <sumitg@...dia.com>
---
drivers/cpufreq/cppc_cpufreq.c | 53 +++++++++++++++++++++++++++-------
1 file changed, 43 insertions(+), 10 deletions(-)
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 5e6a132a525e..52b93ac6225e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -46,6 +46,8 @@ static bool boost_supported;
/* default 2usec delay between sampling */
static unsigned int sampling_delay_us = 2;
+static bool get_rate_use_wq;
+
static void cppc_check_hisi_workaround(void);
static void cppc_nvidia_workaround(void);
@@ -99,6 +101,12 @@ struct cppc_freq_invariance {
static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
static struct kthread_worker *kworker_fie;
+struct feedback_ctrs {
+ u32 cpu;
+ struct cppc_perf_fb_ctrs fb_ctrs_t0;
+ struct cppc_perf_fb_ctrs fb_ctrs_t1;
+};
+
static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
@@ -851,28 +859,44 @@ static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
return (reference_perf * delta_delivered) / delta_reference;
}
+static int cppc_get_perf_ctrs_sync(void *fb_ctrs)
+{
+ struct feedback_ctrs *ctrs = fb_ctrs;
+ int ret;
+
+ ret = cppc_get_perf_ctrs(ctrs->cpu, &(ctrs->fb_ctrs_t0));
+ if (ret)
+ return ret;
+
+ udelay(sampling_delay_us);
+
+ ret = cppc_get_perf_ctrs(ctrs->cpu, &(ctrs->fb_ctrs_t1));
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
{
- struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct cppc_cpudata *cpu_data = policy->driver_data;
+ struct feedback_ctrs fb_ctrs = {0};
u64 delivered_perf;
int ret;
cpufreq_cpu_put(policy);
+ fb_ctrs.cpu = cpu;
- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
- if (ret)
- return ret;
-
- udelay(sampling_delay_us);
-
- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
+ if (get_rate_use_wq)
+ ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_sync, &fb_ctrs, false);
+ else
+ ret = cppc_get_perf_ctrs_sync(&fb_ctrs);
if (ret)
return ret;
- delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
- &fb_ctrs_t1);
+ delivered_perf = cppc_perf_from_fbctrs(cpu_data, &(fb_ctrs.fb_ctrs_t0),
+ &(fb_ctrs.fb_ctrs_t1));
return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
}
@@ -953,7 +977,16 @@ static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu)
static void cppc_nvidia_workaround(void)
{
+ int cpu;
+
sampling_delay_us = 25;
+
+#ifdef CONFIG_ARM64_AMU_EXTN
+ cpu = get_cpu_with_amu_feat();
+
+ if (cpu < nr_cpu_ids)
+ get_rate_use_wq = true;
+#endif
}
static void cppc_check_hisi_workaround(void)
--
2.17.1
Powered by blists - more mailing lists