[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <q4pc63l43mtp56qim6s5riwu25cgvdqfchg7jbqhhcixkyqs7i@mlqqgd5hxnec>
Date: Tue, 12 Mar 2024 19:12:37 -0700
From: Vanshidhar Konda <vanshikonda@...amperecomputing.com>
To: Beata Michalska <beata.michalska@....com>
Cc: linux-kernel@...r.kernel.org, linux-arm-kernel@...ts.infradead.org,
ionela.voinescu@....com, sudeep.holla@....com, will@...nel.org, catalin.marinas@....com,
vincent.guittot@...aro.org, sumitg@...dia.com, yang@...amperecomputing.com,
lihuisong@...wei.com
Subject: Re: [PATCH v3 2/3] arm64: Provide an AMU-based version of
arch_freq_get_on_cpu
On Tue, Mar 12, 2024 at 08:34:30AM +0000, Beata Michalska wrote:
>With the Frequency Invariance Engine (FIE) being already wired up with
>sched tick and making use of relevant (core counter and constant
>counter) AMU counters, getting the current frequency for a given CPU
>on supported platforms can be achieved by utilizing the frequency scale
>factor which reflects an average CPU frequency for the last tick period
>length.
>
>The solution is partially based on APERF/MPERF implementation of
>arch_freq_get_on_cpu.
>
>Suggested-by: Ionela Voinescu <ionela.voinescu@....com>
>Signed-off-by: Beata Michalska <beata.michalska@....com>
>---
> arch/arm64/kernel/topology.c | 103 +++++++++++++++++++++++++++++++----
> 1 file changed, 92 insertions(+), 11 deletions(-)
>
>diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
>index 1a2c72f3e7f8..42cb19c31719 100644
>--- a/arch/arm64/kernel/topology.c
>+++ b/arch/arm64/kernel/topology.c
>@@ -17,6 +17,8 @@
> #include <linux/cpufreq.h>
> #include <linux/init.h>
> #include <linux/percpu.h>
>+#include <linux/sched/isolation.h>
>+#include <linux/seqlock_types.h>
>
> #include <asm/cpu.h>
> #include <asm/cputype.h>
>@@ -88,18 +90,31 @@ int __init parse_acpi_topology(void)
> * initialized.
> */
> static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, arch_max_freq_scale) = 1UL << (2 * SCHED_CAPACITY_SHIFT);
>-static DEFINE_PER_CPU(u64, arch_const_cycles_prev);
>-static DEFINE_PER_CPU(u64, arch_core_cycles_prev);
> static cpumask_var_t amu_fie_cpus;
>
>+struct amu_cntr_sample {
>+ u64 arch_const_cycles_prev;
>+ u64 arch_core_cycles_prev;
>+ unsigned long last_update;
>+ seqcount_t seq;
>+};
>+
>+static DEFINE_PER_CPU_SHARED_ALIGNED(struct amu_cntr_sample, cpu_amu_samples) = {
>+ .seq = SEQCNT_ZERO(cpu_amu_samples.seq)
>+};
>+
> void update_freq_counters_refs(void)
> {
>- this_cpu_write(arch_core_cycles_prev, read_corecnt());
>- this_cpu_write(arch_const_cycles_prev, read_constcnt());
>+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
>+
>+ amu_sample->arch_core_cycles_prev = read_corecnt();
>+ amu_sample->arch_const_cycles_prev = read_constcnt();
> }
>
> static inline bool freq_counters_valid(int cpu)
> {
>+ struct amu_cntr_sample *amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
>+
> if ((cpu >= nr_cpu_ids) || !cpumask_test_cpu(cpu, cpu_present_mask))
> return false;
>
>@@ -108,8 +123,8 @@ static inline bool freq_counters_valid(int cpu)
> return false;
> }
>
>- if (unlikely(!per_cpu(arch_const_cycles_prev, cpu) ||
>- !per_cpu(arch_core_cycles_prev, cpu))) {
>+ if (unlikely(!amu_sample->arch_const_cycles_prev ||
>+ !amu_sample->arch_core_cycles_prev)) {
> pr_debug("CPU%d: cycle counters are not enabled.\n", cpu);
> return false;
> }
>@@ -152,20 +167,27 @@ void freq_inv_set_max_ratio(int cpu, u64 max_rate)
>
> static void amu_scale_freq_tick(void)
> {
>+ struct amu_cntr_sample *amu_sample = this_cpu_ptr(&cpu_amu_samples);
> u64 prev_core_cnt, prev_const_cnt;
> u64 core_cnt, const_cnt, scale;
>
>- prev_const_cnt = this_cpu_read(arch_const_cycles_prev);
>- prev_core_cnt = this_cpu_read(arch_core_cycles_prev);
>+ prev_const_cnt = amu_sample->arch_const_cycles_prev;
>+ prev_core_cnt = amu_sample->arch_core_cycles_prev;
>+
>+ write_seqcount_begin(&amu_sample->seq);
>
> update_freq_counters_refs();
>
>- const_cnt = this_cpu_read(arch_const_cycles_prev);
>- core_cnt = this_cpu_read(arch_core_cycles_prev);
>+ const_cnt = amu_sample->arch_const_cycles_prev;
>+ core_cnt = amu_sample->arch_core_cycles_prev;
>
>+ /*
>+ * This should not happen unless the AMUs have been reset and the
>+ * counter values have not been resroted - unlikely
/resroted/restored
>+ */
> if (unlikely(core_cnt <= prev_core_cnt ||
> const_cnt <= prev_const_cnt))
>- return;
>+ goto leave;
>
> /*
> * /\core arch_max_freq_scale
>@@ -182,6 +204,10 @@ static void amu_scale_freq_tick(void)
>
> scale = min_t(unsigned long, scale, SCHED_CAPACITY_SCALE);
> this_cpu_write(arch_freq_scale, (unsigned long)scale);
>+
>+ amu_sample->last_update = jiffies;
>+leave:
>+ write_seqcount_end(&amu_sample->seq);
> }
>
> static struct scale_freq_data amu_sfd = {
>@@ -189,6 +215,61 @@ static struct scale_freq_data amu_sfd = {
> .set_freq_scale = amu_scale_freq_tick,
> };
>
>+#define AMU_SAMPLE_EXP_MS 20
>+
>+unsigned int arch_freq_get_on_cpu(int cpu)
>+{
>+ struct amu_cntr_sample *amu_sample;
>+ unsigned long last_update;
>+ unsigned int seq;
>+ unsigned int freq;
>+ u64 scale;
>+
>+ if (!cpumask_test_cpu(cpu, amu_fie_cpus) || !arch_scale_freq_ref(cpu))
>+ return 0;
>+
>+retry:
>+ amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
>+
>+ do {
>+ seq = raw_read_seqcount_begin(&amu_sample->seq);
>+ last_update = amu_sample->last_update;
>+ } while (read_seqcount_retry(&amu_sample->seq, seq));
>+
>+ /*
>+ * For those CPUs that are in full dynticks mode,
>+ * and those that have not seen tick for a while
>+ * try an alternative source for the counters (and thus freq scale),
>+ * if available for given policy
>+ */
>+ if (time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
>+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>+ int ref_cpu = nr_cpu_ids;
>+
>+ if (cpumask_intersects(housekeeping_cpumask(HK_TYPE_TICK),
>+ policy->cpus))
>+ ref_cpu = cpumask_nth_and(cpu, policy->cpus,
>+ housekeeping_cpumask(HK_TYPE_TICK));
>+
Could you help me understand why getting the frequency from another
housekeeping cpu would be a better than returning 0? Wouldn't different
CPUs in the HK_TYPE_TICK domain be running at independent frequencies?
May be adding this explanation to the patch commit message would help
people who look at this in the future?
Thanks,
Vanshi
>+ cpufreq_cpu_put(policy);
>+ if (ref_cpu >= nr_cpu_ids || ref_cpu == cpu)
>+ /* No alternative to pull info from */
>+ return 0;
>+ cpu = ref_cpu;
>+ goto retry;
>+ }
>+ /*
>+ * Reversed computation to the one used to determine
>+ * the arch_freq_scale value
>+ * (see amu_scale_freq_tick for details)
>+ */
>+ scale = arch_scale_freq_capacity(cpu);
>+ freq = scale * arch_scale_freq_ref(cpu);
>+ freq >>= SCHED_CAPACITY_SHIFT;
>+
>+ return freq;
>+}
>+
> static void amu_fie_setup(const struct cpumask *cpus)
> {
> int cpu;
>--
>2.25.1
>
Powered by blists - more mailing lists