[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAJZ5v0h_Zed_0ESv2A3XJ+F3e5qAdqu6gR9xiiBnCF59cN4KCQ@mail.gmail.com>
Date: Tue, 19 Apr 2022 17:40:31 +0200
From: "Rafael J. Wysocki" <rafael@...nel.org>
To: Thomas Gleixner <tglx@...utronix.de>
Cc: LKML <linux-kernel@...r.kernel.org>,
"the arch/x86 maintainers" <x86@...nel.org>,
"Rafael J. Wysocki" <rafael@...nel.org>,
Linux PM <linux-pm@...r.kernel.org>,
Eric Dumazet <edumazet@...gle.com>,
"Paul E. McKenney" <paulmck@...nel.org>
Subject: Re: [patch 02/10] x86/smp: Move APERF/MPERF code where it belongs
On Fri, Apr 15, 2022 at 9:19 PM Thomas Gleixner <tglx@...utronix.de> wrote:
>
> as this can share code with the preexisting APERF/MPERF code.
>
> No functional change.
>
> Signed-off-by: Thomas Gleixner <tglx@...utronix.de>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> ---
> arch/x86/kernel/cpu/aperfmperf.c | 366 ++++++++++++++++++++++++++++++++++++++-
> arch/x86/kernel/smpboot.c | 355 -------------------------------------
> 2 files changed, 362 insertions(+), 359 deletions(-)
>
> --- a/arch/x86/kernel/cpu/aperfmperf.c
> +++ b/arch/x86/kernel/cpu/aperfmperf.c
> @@ -6,15 +6,19 @@
> * Copyright (C) 2017 Intel Corp.
> * Author: Len Brown <len.brown@...el.com>
> */
> -
> +#include <linux/cpufreq.h>
> #include <linux/delay.h>
> #include <linux/ktime.h>
> #include <linux/math64.h>
> #include <linux/percpu.h>
> -#include <linux/cpufreq.h>
> -#include <linux/smp.h>
> -#include <linux/sched/isolation.h>
> #include <linux/rcupdate.h>
> +#include <linux/sched/isolation.h>
> +#include <linux/sched/topology.h>
> +#include <linux/smp.h>
> +#include <linux/syscore_ops.h>
> +
> +#include <asm/cpu_device_id.h>
> +#include <asm/intel-family.h>
>
> #include "cpu.h"
>
> @@ -152,3 +156,357 @@ unsigned int arch_freq_get_on_cpu(int cp
>
> return per_cpu(samples.khz, cpu);
> }
> +
> +#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
> +/*
> + * APERF/MPERF frequency ratio computation.
> + *
> + * The scheduler wants to do frequency invariant accounting and needs a <1
> + * ratio to account for the 'current' frequency, corresponding to
> + * freq_curr / freq_max.
> + *
> + * Since the frequency freq_curr on x86 is controlled by micro-controller and
> + * our P-state setting is little more than a request/hint, we need to observe
> + * the effective frequency 'BusyMHz', i.e. the average frequency over a time
> + * interval after discarding idle time. This is given by:
> + *
> + * BusyMHz = delta_APERF / delta_MPERF * freq_base
> + *
> + * where freq_base is the max non-turbo P-state.
> + *
> + * The freq_max term has to be set to a somewhat arbitrary value, because we
> + * can't know which turbo states will be available at a given point in time:
> + * it all depends on the thermal headroom of the entire package. We set it to
> + * the turbo level with 4 cores active.
> + *
> + * Benchmarks show that's a good compromise between the 1C turbo ratio
> + * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
> + * which would ignore the entire turbo range (a conspicuous part, making
> + * freq_curr/freq_max always maxed out).
> + *
> + * An exception to the heuristic above is the Atom uarch, where we choose the
> + * highest turbo level for freq_max since Atom's are generally oriented towards
> + * power efficiency.
> + *
> + * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
> + * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
> + */
> +
> +DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
> +
> +static DEFINE_PER_CPU(u64, arch_prev_aperf);
> +static DEFINE_PER_CPU(u64, arch_prev_mperf);
> +static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
> +static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
> +
> +void arch_set_max_freq_ratio(bool turbo_disabled)
> +{
> + arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
> + arch_turbo_freq_ratio;
> +}
> +EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
> +
> +static bool turbo_disabled(void)
> +{
> + u64 misc_en;
> + int err;
> +
> + err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
> + if (err)
> + return false;
> +
> + return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
> +}
> +
> +static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> +{
> + int err;
> +
> + err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
> + if (err)
> + return false;
> +
> + err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
> + if (err)
> + return false;
> +
> + *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
> + *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
> +
> + return true;
> +}
> +
> +#define X86_MATCH(model) \
> + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
> + INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
> +
> +static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
> + X86_MATCH(XEON_PHI_KNL),
> + X86_MATCH(XEON_PHI_KNM),
> + {}
> +};
> +
> +static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
> + X86_MATCH(SKYLAKE_X),
> + {}
> +};
> +
> +static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
> + X86_MATCH(ATOM_GOLDMONT),
> + X86_MATCH(ATOM_GOLDMONT_D),
> + X86_MATCH(ATOM_GOLDMONT_PLUS),
> + {}
> +};
> +
> +static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
> + int num_delta_fratio)
> +{
> + int fratio, delta_fratio, found;
> + int err, i;
> + u64 msr;
> +
> + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> + if (err)
> + return false;
> +
> + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
> +
> + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> + if (err)
> + return false;
> +
> + fratio = (msr >> 8) & 0xFF;
> + i = 16;
> + found = 0;
> + do {
> + if (found >= num_delta_fratio) {
> + *turbo_freq = fratio;
> + return true;
> + }
> +
> + delta_fratio = (msr >> (i + 5)) & 0x7;
> +
> + if (delta_fratio) {
> + found += 1;
> + fratio -= delta_fratio;
> + }
> +
> + i += 8;
> + } while (i < 64);
> +
> + return true;
> +}
> +
> +static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
> +{
> + u64 ratios, counts;
> + u32 group_size;
> + int err, i;
> +
> + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> + if (err)
> + return false;
> +
> + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
> +
> + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
> + if (err)
> + return false;
> +
> + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
> + if (err)
> + return false;
> +
> + for (i = 0; i < 64; i += 8) {
> + group_size = (counts >> i) & 0xFF;
> + if (group_size >= size) {
> + *turbo_freq = (ratios >> i) & 0xFF;
> + return true;
> + }
> + }
> +
> + return false;
> +}
> +
> +static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> +{
> + u64 msr;
> + int err;
> +
> + err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> + if (err)
> + return false;
> +
> + err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> + if (err)
> + return false;
> +
> + *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
> + *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
> +
> + /* The CPU may have less than 4 cores */
> + if (!*turbo_freq)
> + *turbo_freq = msr & 0xFF; /* 1C turbo */
> +
> + return true;
> +}
> +
> +static bool intel_set_max_freq_ratio(void)
> +{
> + u64 base_freq, turbo_freq;
> + u64 turbo_ratio;
> +
> + if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
> + goto out;
> +
> + if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
> + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> + goto out;
> +
> + if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
> + knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> + goto out;
> +
> + if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
> + skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
> + goto out;
> +
> + if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
> + goto out;
> +
> + return false;
> +
> +out:
> + /*
> + * Some hypervisors advertise X86_FEATURE_APERFMPERF
> + * but then fill all MSR's with zeroes.
> + * Some CPUs have turbo boost but don't declare any turbo ratio
> + * in MSR_TURBO_RATIO_LIMIT.
> + */
> + if (!base_freq || !turbo_freq) {
> + pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
> + return false;
> + }
> +
> + turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
> + if (!turbo_ratio) {
> + pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
> + return false;
> + }
> +
> + arch_turbo_freq_ratio = turbo_ratio;
> + arch_set_max_freq_ratio(turbo_disabled());
> +
> + return true;
> +}
> +
> +static void init_counter_refs(void)
> +{
> + u64 aperf, mperf;
> +
> + rdmsrl(MSR_IA32_APERF, aperf);
> + rdmsrl(MSR_IA32_MPERF, mperf);
> +
> + this_cpu_write(arch_prev_aperf, aperf);
> + this_cpu_write(arch_prev_mperf, mperf);
> +}
> +
> +#ifdef CONFIG_PM_SLEEP
> +static struct syscore_ops freq_invariance_syscore_ops = {
> + .resume = init_counter_refs,
> +};
> +
> +static void register_freq_invariance_syscore_ops(void)
> +{
> + /* Bail out if registered already. */
> + if (freq_invariance_syscore_ops.node.prev)
> + return;
> +
> + register_syscore_ops(&freq_invariance_syscore_ops);
> +}
> +#else
> +static inline void register_freq_invariance_syscore_ops(void) {}
> +#endif
> +
> +void init_freq_invariance(bool secondary, bool cppc_ready)
> +{
> + bool ret = false;
> +
> + if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
> + return;
> +
> + if (secondary) {
> + if (static_branch_likely(&arch_scale_freq_key)) {
> + init_counter_refs();
> + }
> + return;
> + }
> +
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
> + ret = intel_set_max_freq_ratio();
> + else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> + if (!cppc_ready) {
> + return;
> + }
> + ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
> + }
> +
> + if (ret) {
> + init_counter_refs();
> + static_branch_enable(&arch_scale_freq_key);
> + register_freq_invariance_syscore_ops();
> + pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
> + } else {
> + pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
> + }
> +}
> +
> +static void disable_freq_invariance_workfn(struct work_struct *work)
> +{
> + static_branch_disable(&arch_scale_freq_key);
> +}
> +
> +static DECLARE_WORK(disable_freq_invariance_work,
> + disable_freq_invariance_workfn);
> +
> +DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
> +
> +void arch_scale_freq_tick(void)
> +{
> + u64 freq_scale;
> + u64 aperf, mperf;
> + u64 acnt, mcnt;
> +
> + if (!arch_scale_freq_invariant())
> + return;
> +
> + rdmsrl(MSR_IA32_APERF, aperf);
> + rdmsrl(MSR_IA32_MPERF, mperf);
> +
> + acnt = aperf - this_cpu_read(arch_prev_aperf);
> + mcnt = mperf - this_cpu_read(arch_prev_mperf);
> +
> + this_cpu_write(arch_prev_aperf, aperf);
> + this_cpu_write(arch_prev_mperf, mperf);
> +
> + if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
> + goto error;
> +
> + if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
> + goto error;
> +
> + freq_scale = div64_u64(acnt, mcnt);
> + if (!freq_scale)
> + goto error;
> +
> + if (freq_scale > SCHED_CAPACITY_SCALE)
> + freq_scale = SCHED_CAPACITY_SCALE;
> +
> + this_cpu_write(arch_freq_scale, freq_scale);
> + return;
> +
> +error:
> + pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
> + schedule_work(&disable_freq_invariance_work);
> +}
> +#endif /* CONFIG_X86_64 && CONFIG_SMP */
> --- a/arch/x86/kernel/smpboot.c
> +++ b/arch/x86/kernel/smpboot.c
> @@ -56,7 +56,6 @@
> #include <linux/numa.h>
> #include <linux/pgtable.h>
> #include <linux/overflow.h>
> -#include <linux/syscore_ops.h>
>
> #include <asm/acpi.h>
> #include <asm/desc.h>
> @@ -1847,357 +1846,3 @@ void native_play_dead(void)
> }
>
> #endif
> -
> -#ifdef CONFIG_X86_64
> -/*
> - * APERF/MPERF frequency ratio computation.
> - *
> - * The scheduler wants to do frequency invariant accounting and needs a <1
> - * ratio to account for the 'current' frequency, corresponding to
> - * freq_curr / freq_max.
> - *
> - * Since the frequency freq_curr on x86 is controlled by micro-controller and
> - * our P-state setting is little more than a request/hint, we need to observe
> - * the effective frequency 'BusyMHz', i.e. the average frequency over a time
> - * interval after discarding idle time. This is given by:
> - *
> - * BusyMHz = delta_APERF / delta_MPERF * freq_base
> - *
> - * where freq_base is the max non-turbo P-state.
> - *
> - * The freq_max term has to be set to a somewhat arbitrary value, because we
> - * can't know which turbo states will be available at a given point in time:
> - * it all depends on the thermal headroom of the entire package. We set it to
> - * the turbo level with 4 cores active.
> - *
> - * Benchmarks show that's a good compromise between the 1C turbo ratio
> - * (freq_curr/freq_max would rarely reach 1) and something close to freq_base,
> - * which would ignore the entire turbo range (a conspicuous part, making
> - * freq_curr/freq_max always maxed out).
> - *
> - * An exception to the heuristic above is the Atom uarch, where we choose the
> - * highest turbo level for freq_max since Atom's are generally oriented towards
> - * power efficiency.
> - *
> - * Setting freq_max to anything less than the 1C turbo ratio makes the ratio
> - * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1.
> - */
> -
> -DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key);
> -
> -static DEFINE_PER_CPU(u64, arch_prev_aperf);
> -static DEFINE_PER_CPU(u64, arch_prev_mperf);
> -static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE;
> -static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE;
> -
> -void arch_set_max_freq_ratio(bool turbo_disabled)
> -{
> - arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE :
> - arch_turbo_freq_ratio;
> -}
> -EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio);
> -
> -static bool turbo_disabled(void)
> -{
> - u64 misc_en;
> - int err;
> -
> - err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en);
> - if (err)
> - return false;
> -
> - return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE);
> -}
> -
> -static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> -{
> - int err;
> -
> - err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq);
> - if (err)
> - return false;
> -
> - err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq);
> - if (err)
> - return false;
> -
> - *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */
> - *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */
> -
> - return true;
> -}
> -
> -#define X86_MATCH(model) \
> - X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \
> - INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL)
> -
> -static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = {
> - X86_MATCH(XEON_PHI_KNL),
> - X86_MATCH(XEON_PHI_KNM),
> - {}
> -};
> -
> -static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = {
> - X86_MATCH(SKYLAKE_X),
> - {}
> -};
> -
> -static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = {
> - X86_MATCH(ATOM_GOLDMONT),
> - X86_MATCH(ATOM_GOLDMONT_D),
> - X86_MATCH(ATOM_GOLDMONT_PLUS),
> - {}
> -};
> -
> -static bool knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq,
> - int num_delta_fratio)
> -{
> - int fratio, delta_fratio, found;
> - int err, i;
> - u64 msr;
> -
> - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> - if (err)
> - return false;
> -
> - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
> -
> - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> - if (err)
> - return false;
> -
> - fratio = (msr >> 8) & 0xFF;
> - i = 16;
> - found = 0;
> - do {
> - if (found >= num_delta_fratio) {
> - *turbo_freq = fratio;
> - return true;
> - }
> -
> - delta_fratio = (msr >> (i + 5)) & 0x7;
> -
> - if (delta_fratio) {
> - found += 1;
> - fratio -= delta_fratio;
> - }
> -
> - i += 8;
> - } while (i < 64);
> -
> - return true;
> -}
> -
> -static bool skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size)
> -{
> - u64 ratios, counts;
> - u32 group_size;
> - int err, i;
> -
> - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> - if (err)
> - return false;
> -
> - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
> -
> - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios);
> - if (err)
> - return false;
> -
> - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts);
> - if (err)
> - return false;
> -
> - for (i = 0; i < 64; i += 8) {
> - group_size = (counts >> i) & 0xFF;
> - if (group_size >= size) {
> - *turbo_freq = (ratios >> i) & 0xFF;
> - return true;
> - }
> - }
> -
> - return false;
> -}
> -
> -static bool core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq)
> -{
> - u64 msr;
> - int err;
> -
> - err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq);
> - if (err)
> - return false;
> -
> - err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr);
> - if (err)
> - return false;
> -
> - *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */
> - *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */
> -
> - /* The CPU may have less than 4 cores */
> - if (!*turbo_freq)
> - *turbo_freq = msr & 0xFF; /* 1C turbo */
> -
> - return true;
> -}
> -
> -static bool intel_set_max_freq_ratio(void)
> -{
> - u64 base_freq, turbo_freq;
> - u64 turbo_ratio;
> -
> - if (slv_set_max_freq_ratio(&base_freq, &turbo_freq))
> - goto out;
> -
> - if (x86_match_cpu(has_glm_turbo_ratio_limits) &&
> - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> - goto out;
> -
> - if (x86_match_cpu(has_knl_turbo_ratio_limits) &&
> - knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1))
> - goto out;
> -
> - if (x86_match_cpu(has_skx_turbo_ratio_limits) &&
> - skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4))
> - goto out;
> -
> - if (core_set_max_freq_ratio(&base_freq, &turbo_freq))
> - goto out;
> -
> - return false;
> -
> -out:
> - /*
> - * Some hypervisors advertise X86_FEATURE_APERFMPERF
> - * but then fill all MSR's with zeroes.
> - * Some CPUs have turbo boost but don't declare any turbo ratio
> - * in MSR_TURBO_RATIO_LIMIT.
> - */
> - if (!base_freq || !turbo_freq) {
> - pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n");
> - return false;
> - }
> -
> - turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq);
> - if (!turbo_ratio) {
> - pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n");
> - return false;
> - }
> -
> - arch_turbo_freq_ratio = turbo_ratio;
> - arch_set_max_freq_ratio(turbo_disabled());
> -
> - return true;
> -}
> -
> -static void init_counter_refs(void)
> -{
> - u64 aperf, mperf;
> -
> - rdmsrl(MSR_IA32_APERF, aperf);
> - rdmsrl(MSR_IA32_MPERF, mperf);
> -
> - this_cpu_write(arch_prev_aperf, aperf);
> - this_cpu_write(arch_prev_mperf, mperf);
> -}
> -
> -#ifdef CONFIG_PM_SLEEP
> -static struct syscore_ops freq_invariance_syscore_ops = {
> - .resume = init_counter_refs,
> -};
> -
> -static void register_freq_invariance_syscore_ops(void)
> -{
> - /* Bail out if registered already. */
> - if (freq_invariance_syscore_ops.node.prev)
> - return;
> -
> - register_syscore_ops(&freq_invariance_syscore_ops);
> -}
> -#else
> -static inline void register_freq_invariance_syscore_ops(void) {}
> -#endif
> -
> -void init_freq_invariance(bool secondary, bool cppc_ready)
> -{
> - bool ret = false;
> -
> - if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
> - return;
> -
> - if (secondary) {
> - if (static_branch_likely(&arch_scale_freq_key)) {
> - init_counter_refs();
> - }
> - return;
> - }
> -
> - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
> - ret = intel_set_max_freq_ratio();
> - else if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
> - if (!cppc_ready) {
> - return;
> - }
> - ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio);
> - }
> -
> - if (ret) {
> - init_counter_refs();
> - static_branch_enable(&arch_scale_freq_key);
> - register_freq_invariance_syscore_ops();
> - pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio);
> - } else {
> - pr_debug("Couldn't determine max cpu frequency, necessary for scale-invariant accounting.\n");
> - }
> -}
> -
> -static void disable_freq_invariance_workfn(struct work_struct *work)
> -{
> - static_branch_disable(&arch_scale_freq_key);
> -}
> -
> -static DECLARE_WORK(disable_freq_invariance_work,
> - disable_freq_invariance_workfn);
> -
> -DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE;
> -
> -void arch_scale_freq_tick(void)
> -{
> - u64 freq_scale;
> - u64 aperf, mperf;
> - u64 acnt, mcnt;
> -
> - if (!arch_scale_freq_invariant())
> - return;
> -
> - rdmsrl(MSR_IA32_APERF, aperf);
> - rdmsrl(MSR_IA32_MPERF, mperf);
> -
> - acnt = aperf - this_cpu_read(arch_prev_aperf);
> - mcnt = mperf - this_cpu_read(arch_prev_mperf);
> -
> - this_cpu_write(arch_prev_aperf, aperf);
> - this_cpu_write(arch_prev_mperf, mperf);
> -
> - if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt))
> - goto error;
> -
> - if (check_mul_overflow(mcnt, arch_max_freq_ratio, &mcnt) || !mcnt)
> - goto error;
> -
> - freq_scale = div64_u64(acnt, mcnt);
> - if (!freq_scale)
> - goto error;
> -
> - if (freq_scale > SCHED_CAPACITY_SCALE)
> - freq_scale = SCHED_CAPACITY_SCALE;
> -
> - this_cpu_write(arch_freq_scale, freq_scale);
> - return;
> -
> -error:
> - pr_warn("Scheduler frequency invariance went wobbly, disabling!\n");
> - schedule_work(&disable_freq_invariance_work);
> -}
> -#endif /* CONFIG_X86_64 */
>
Powered by blists - more mailing lists