[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0dc000e3-9b85-0841-03c0-3589e2e5d34b@nvidia.com>
Date: Thu, 18 May 2023 20:10:36 +0530
From: Sumit Gupta <sumitg@...dia.com>
To: Yang Shi <yang@...amperecomputing.com>,
Pierre Gondois <pierre.gondois@....com>,
Zeng Heng <zengheng4@...wei.com>,
Ionela Voinescu <Ionela.Voinescu@....com>
CC: <linux-kernel@...r.kernel.org>, <linux-pm@...r.kernel.org>,
<wangxiongfeng2@...wei.com>, <xiexiuqi@...wei.com>,
<liwei391@...wei.com>, <linux-acpi@...r.kernel.org>,
<weiyongjun1@...wei.com>, <lenb@...nel.org>,
<viresh.kumar@...aro.org>, <rafael@...nel.org>
Subject: Re: [PATCH v2 1/2] cpufreq: CPPC: keep target core awake when reading
its cpufreq rate
>>>
>>> +Ionela, Sumit, Yang,
>>>
>>> Hello Zeng,
>>>
>>> I think solutions around related issues were suggested at:
>>>
>>> [1]
>>> https://lore.kernel.org/all/20230418113459.12860-7-sumitg@nvidia.com/
>>> [2]
>>> https://lore.kernel.org/all/20230328193846.8757-1-yang@os.amperecomputing.com/
>>> [3] https://lore.kernel.org/all/ZEl1Fms%2FJmdEZsVn@arm.com/
>>>
>>> About this patch, it seems to mean that CPPC counters of CPUx are always
>>> accessed from CPUx, even when they are not AMUs. For instance CPPC
>>> counters could be memory mapped and accessible from any CPU.
>>> cpu_has_amu_feat() should allow to probe if a CPU uses AMUs or not,
>>> and [2] had an implementation using it.
>>>
>>> Another comment about PATCH 2/2 is that if the counters are accessed
>>> through FFH, arm64 version of cpc_read_ffh() is calling
>>> counters_read_on_cpu(), and a comment in counters_read_on_cpu() seems
>>> to specify the function must be called with interrupt enabled.
>>>
>>> I think the best solution so far was the one at [3], suggested by
>>> Ionela,
>>> but it doesn't seem to solve your issue. Indeed, it is not checked
>>> whether
>>> the counters are AMU counters and that they must be remotely read (to
>>> have the CPU awake),
>>>
>>> Regards,
>>> Pierre
>>>
>>
>> I think the solution in [1] is simple and solves all the three cases.
>> Also, it provides better accuracy between the set and get frequency as
>> compared to [3].
>
> I don't think [1] patches work for our case. We use mmio instead of AMU.
> Increasing delay could help to mitigate it somehow, buyt 25us is not
> good enough for our case. IIRC the fix proposed by Ionela works for both
> yours and mine.
>
I have added the CPC_IN_SYSTEM_MEMORY check from [2] in [1].
Could you please test if the below change works for you.
-----------------------------------------
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 7ff269a78c20..67aa09b5f15c 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1315,6 +1315,7 @@ int cppc_get_perf_ctrs(int cpunum, struct
cppc_perf_fb_ctrs *perf_fb_ctrs)
struct cppc_pcc_data *pcc_ss_data = NULL;
u64 delivered, reference, ref_perf, ctr_wrap_time;
int ret = 0, regs_in_pcc = 0;
+ unsigned long flags;
if (!cpc_desc) {
pr_debug("No CPC descriptor for CPU:%d\n", cpunum);
@@ -1350,8 +1351,17 @@ int cppc_get_perf_ctrs(int cpunum, struct
cppc_perf_fb_ctrs *perf_fb_ctrs)
}
}
+ if (CPC_IN_SYSTEM_MEMORY(delivered_reg) &&
+ CPC_IN_SYSTEM_MEMORY(reference_reg))
+ local_irq_save(flags);
+
cpc_read(cpunum, delivered_reg, &delivered);
cpc_read(cpunum, reference_reg, &reference);
+
+ if (CPC_IN_SYSTEM_MEMORY(delivered_reg) &&
+ CPC_IN_SYSTEM_MEMORY(reference_reg))
+ local_irq_restore(flags);
+
cpc_read(cpunum, ref_perf_reg, &ref_perf);
/*
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 5e6a132a525e..23e690854459 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -46,6 +46,8 @@ static bool boost_supported;
/* default 2usec delay between sampling */
static unsigned int sampling_delay_us = 2;
+static bool get_rate_use_wq;
+
static void cppc_check_hisi_workaround(void);
static void cppc_nvidia_workaround(void);
@@ -99,6 +101,12 @@ struct cppc_freq_invariance {
static DEFINE_PER_CPU(struct cppc_freq_invariance, cppc_freq_inv);
static struct kthread_worker *kworker_fie;
+struct feedback_ctrs {
+ u32 cpu;
+ struct cppc_perf_fb_ctrs fb_ctrs_t0;
+ struct cppc_perf_fb_ctrs fb_ctrs_t1;
+};
+
static unsigned int hisi_cppc_cpufreq_get_rate(unsigned int cpu);
static int cppc_perf_from_fbctrs(struct cppc_cpudata *cpu_data,
struct cppc_perf_fb_ctrs *fb_ctrs_t0,
@@ -851,28 +859,44 @@ static int cppc_perf_from_fbctrs(struct
cppc_cpudata *cpu_data,
return (reference_perf * delta_delivered) / delta_reference;
}
+static int cppc_get_perf_ctrs_sync(void *fb_ctrs)
+{
+ struct feedback_ctrs *ctrs = fb_ctrs;
+ int ret;
+
+ ret = cppc_get_perf_ctrs(ctrs->cpu, &(ctrs->fb_ctrs_t0));
+ if (ret)
+ return ret;
+
+ udelay(sampling_delay_us);
+
+ ret = cppc_get_perf_ctrs(ctrs->cpu, &(ctrs->fb_ctrs_t1));
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
static unsigned int cppc_cpufreq_get_rate(unsigned int cpu)
{
- struct cppc_perf_fb_ctrs fb_ctrs_t0 = {0}, fb_ctrs_t1 = {0};
struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
struct cppc_cpudata *cpu_data = policy->driver_data;
+ struct feedback_ctrs fb_ctrs = {0};
u64 delivered_perf;
int ret;
cpufreq_cpu_put(policy);
+ fb_ctrs.cpu = cpu;
- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t0);
- if (ret)
- return ret;
-
- udelay(sampling_delay_us);
-
- ret = cppc_get_perf_ctrs(cpu, &fb_ctrs_t1);
+ if (get_rate_use_wq)
+ ret = smp_call_on_cpu(cpu, cppc_get_perf_ctrs_sync, &fb_ctrs, false);
+ else
+ ret = cppc_get_perf_ctrs_sync(&fb_ctrs);
if (ret)
return ret;
- delivered_perf = cppc_perf_from_fbctrs(cpu_data, &fb_ctrs_t0,
- &fb_ctrs_t1);
+ delivered_perf = cppc_perf_from_fbctrs(cpu_data, &(fb_ctrs.fb_ctrs_t0),
+ &(fb_ctrs.fb_ctrs_t1));
return cppc_cpufreq_perf_to_khz(cpu_data, delivered_perf);
}
@@ -1002,10 +1026,18 @@ static void cppc_apply_workarounds(void)
static int __init cppc_cpufreq_init(void)
{
int ret;
+ int cpu;
if (!acpi_cpc_valid())
return -ENODEV;
+#ifdef CONFIG_ARM64_AMU_EXTN
+ cpu = get_cpu_with_amu_feat();
+
+ if (cpu < nr_cpu_ids)
+ get_rate_use_wq = true;
+#endif
--------------------------------------------
We can add additional check to call smp_call_on_cpu() only when
CPC_IN_FFH if we want to reduce the scope of calling smp_call_on_cpu.
diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index 67aa09b5f15c..3d8348911403 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -110,6 +110,11 @@ static DEFINE_PER_CPU(struct cpc_desc *, cpc_desc_ptr);
(cpc)->cpc_entry.reg.space_id == \
ACPI_ADR_SPACE_SYSTEM_IO)
+/* Check if a CPC register is in FFH */
+#define CPC_IN_FFH(cpc) ((cpc)->type == ACPI_TYPE_BUFFER && \
+ (cpc)->cpc_entry.reg.space_id == \
+ ACPI_ADR_SPACE_FIXED_HARDWARE)
+
/* Evaluates to True if reg is a NULL register descriptor */
#define IS_NULL_REG(reg) ((reg)->space_id ==
ACPI_ADR_SPACE_SYSTEM_MEMORY && \
(reg)->address == 0 && \
@@ -437,6 +442,29 @@ bool acpi_cpc_valid(void)
}
EXPORT_SYMBOL_GPL(acpi_cpc_valid);
+bool acpi_cpc_in_ffh(void)
+{
+ struct cpc_register_resource *delivered_reg, *reference_reg;
+ struct cpc_desc *cpc_ptr;
+ int cpu;
+
+ if (acpi_disabled)
+ return false;
+
+ for_each_possible_cpu(cpu) {
+ cpc_ptr = per_cpu(cpc_desc_ptr, cpu);
+ delivered_reg = &cpc_ptr->cpc_regs[DELIVERED_CTR];
+ reference_reg = &cpc_ptr->cpc_regs[REFERENCE_CTR];
+
+ if (!CPC_IN_FFH(delivered_reg) ||
+ !CPC_IN_FFH(reference_reg))
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(acpi_cpc_in_ffh);
+
bool cppc_allow_fast_switch(void)
{
struct cpc_register_resource *desired_reg;
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 23e690854459..4109e00b957e 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -1034,7 +1034,7 @@ static int __init cppc_cpufreq_init(void)
#ifdef CONFIG_ARM64_AMU_EXTN
cpu = get_cpu_with_amu_feat();
- if (cpu < nr_cpu_ids)
+ if ((cpu < nr_cpu_ids) && acpi_cpc_in_ffh())
get_rate_use_wq = true;
#endif
>>
>> This can be merged and can later still be improved in Upstream.
>>
>> If OK, I can send new version by changing the patch to apply for all
>> ARM SoC's with AMU and not specific to Tegra.
>>
>> Thank you,
>> Sumit Gupta
Powered by blists - more mailing lists