[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <e983a5ad-dcdd-7646-44e5-14a0e2aa5b43@amd.com>
Date: Tue, 29 Nov 2022 18:18:57 +0530
From: Wyes Karny <wyes.karny@....com>
To: Perry Yuan <perry.yuan@....com>, rafael.j.wysocki@...el.com,
Mario.Limonciello@....com, ray.huang@....com,
viresh.kumar@...aro.org
Cc: Deepak.Sharma@....com, Nathan.Fontenot@....com,
Alexander.Deucher@....com, Shimmer.Huang@....com,
Xiaojian.Du@....com, Li.Meng@....com, linux-pm@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH v5 3/9] cpufreq: amd_pstate: implement Pstate EPP support
for the AMD processors
Hi Perry,
On 11/28/2022 10:33 PM, Perry Yuan wrote:
> From: Perry Yuan <Perry.Yuan@....com>
>
> Add EPP driver support for AMD SoCs which support a dedicated MSR for
> CPPC. EPP is used by the DPM controller to configure the frequency that
> a core operates at during short periods of activity.
>
> The SoC EPP targets are configured on a scale from 0 to 255 where 0
> represents maximum performance and 255 represents maximum efficiency.
>
> The amd-pstate driver exports profile string names to userspace that are
> tied to specific EPP values.
>
> The balance_performance string (0x80) provides the best balance for
> efficiency versus power on most systems, but users can choose other
> strings to meet their needs as well.
>
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
> default performance balance_performance balance_power power
>
> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
> balance_performance
>
> Signed-off-by: Perry Yuan <Perry.Yuan@....com>
> ---
> drivers/cpufreq/amd-pstate.c | 635 ++++++++++++++++++++++++++++++++++-
> include/linux/amd-pstate.h | 81 +++++
> 2 files changed, 710 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 204e39006dda..5a19b832afdf 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -59,8 +59,132 @@
> * we disable it by default to go acpi-cpufreq on these processors and add a
> * module parameter to be able to enable it manually for debugging.
> */
> -static struct cpufreq_driver amd_pstate_driver;
> +static bool shared_mem __read_mostly;
`shared_mem` is initialized to 0 here but it is not being overwite anywhere.
> +static int cppc_active __read_mostly;
> static int cppc_load __initdata;
> +static int epp_off __initdata;
Not sure why we need `cppc_active` and `epp_off` two veriables to control `active mode`.
Is is because `epp_off` is marked as __initdata, therefore if you read `epp_off` in non-init functions
it shows zero?
In that case, can we remove __initdata from `epp_off` and use this same variable elsewhere?
> +
> +static struct cpufreq_driver *default_pstate_driver;
> +static struct amd_cpudata **all_cpu_data;
> +
> +static struct amd_pstate_params global_params;
> +
> +static DEFINE_MUTEX(amd_pstate_limits_lock);
> +static DEFINE_MUTEX(amd_pstate_driver_lock);
> +
> +static bool cppc_boost __read_mostly;
> +struct kobject *amd_pstate_kobj;
> +
> +#ifdef CONFIG_ACPI_CPPC_LIB
> +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
> +{
> + s16 epp;
> + struct cppc_perf_caps perf_caps;
> + int ret;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + if (!cppc_req_cached) {
> + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> + &cppc_req_cached);
> + if (epp)
> + return epp;
> + }
> + epp = (cppc_req_cached >> 24) & 0xFF;
> + } else {
> + ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
> + if (ret < 0) {
> + pr_debug("Could not retrieve energy perf value (%d)\n", ret);
> + return -EIO;
> + }
> + epp = (s16) perf_caps.energy_perf;
> + }
> +
> + return epp;
> +}
> +#endif
> +
> +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata)
> +{
> + s16 epp;
> + int index = -EINVAL;
> +
> + epp = amd_pstate_get_epp(cpudata, 0);
> + if (epp < 0)
> + return epp;
> +
> + switch (epp) {
> + case AMD_CPPC_EPP_PERFORMANCE:
> + index = EPP_INDEX_PERFORMANCE;
> + break;
> + case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
> + index = EPP_INDEX_BALANCE_PERFORMANCE;
> + break;
> + case AMD_CPPC_EPP_BALANCE_POWERSAVE:
> + index = EPP_INDEX_BALANCE_POWERSAVE;
> + break;
> + case AMD_CPPC_EPP_POWERSAVE:
> + index = EPP_INDEX_POWERSAVE;
> + break;
> + default:
> + break;
> + }
> +
> + return index;
> +}
> +
> +#ifdef CONFIG_ACPI_CPPC_LIB
> +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
> +{
> + int ret;
> + struct cppc_perf_ctrls perf_ctrls;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + value &= ~GENMASK_ULL(31, 24);
> + value |= (u64)epp << 24;
small nit, AMD_CPPC_ENERGY_PERF_PREF macro can be used here, instead of hardcoding values.
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> + if (!ret)
> + cpudata->epp_cached = epp;
> + } else {
> + perf_ctrls.energy_perf = epp;
> + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> + if (ret) {
> + pr_debug("failed to set energy perf value (%d)\n", ret);
> + return ret;
> + }
> + cpudata->epp_cached = epp;
> + }
> +
> + return ret;
> +}
> +
> +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
> + int pref_index)
> +{
> + int epp = -EINVAL;
> + int ret;
> +
> + if (!pref_index) {
> + pr_debug("EPP pref_index is invalid\n");
> + return -EINVAL;
> + }
> +
> + if (epp == -EINVAL)
> + epp = epp_values[pref_index];
> +
> + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + pr_debug("EPP cannot be set under performance policy\n");
> + return -EBUSY;
> + }
> +
> + ret = amd_pstate_set_epp(cpudata, epp);
> +
> + return ret;
> +}
> +#endif
>
> static inline int pstate_enable(bool enable)
> {
> @@ -70,11 +194,21 @@ static inline int pstate_enable(bool enable)
> static int cppc_enable(bool enable)
> {
> int cpu, ret = 0;
> + struct cppc_perf_ctrls perf_ctrls;
>
> for_each_present_cpu(cpu) {
> ret = cppc_set_enable(cpu, enable);
> if (ret)
> return ret;
> +
> + /* Enable autonomous mode for EPP */
> + if (!cppc_active) {
It should be: if (cppc_active) { right?
> + /* Set desired perf as zero to allow EPP firmware control */
> + perf_ctrls.desired_perf = 0;
> + ret = cppc_set_perf(cpu, &perf_ctrls);
Don't we have to do same thing (setting des_perf to 0) in `pstate_enable` function?
Because for active + MSR supported processors will invoke `pstate_enable` function.
> + if (ret)
> + return ret;
> + }
> }
>
> return ret;
> @@ -417,7 +551,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
> return;
>
> cpudata->boost_supported = true;
> - amd_pstate_driver.boost_enabled = true;
> + default_pstate_driver->boost_enabled = true;
> }
>
> static void amd_perf_ctl_reset(unsigned int cpu)
> @@ -591,10 +725,61 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
> return sprintf(&buf[0], "%u\n", perf);
> }
>
> +static ssize_t show_energy_performance_available_preferences(
> + struct cpufreq_policy *policy, char *buf)
> +{
> + int i = 0;
> + int ret = 0;
> +
> + while (energy_perf_strings[i] != NULL)
> + ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
> +
> + ret += sysfs_emit(&buf[ret], "\n");
> +
> + return ret;
> +}
> +
> +static ssize_t store_energy_performance_preference(
> + struct cpufreq_policy *policy, const char *buf, size_t count)
> +{
> + struct amd_cpudata *cpudata = policy->driver_data;
> + char str_preference[21];
> + ssize_t ret;
> +
> + ret = sscanf(buf, "%20s", str_preference);
> + if (ret != 1)
> + return -EINVAL;
> +
> + ret = match_string(energy_perf_strings, -1, str_preference);
> + if (ret < 0)
> + return -EINVAL;
> +
> + mutex_lock(&amd_pstate_limits_lock);
> + ret = amd_pstate_set_energy_pref_index(cpudata, ret);
> + mutex_unlock(&amd_pstate_limits_lock);
> +
> + return ret ?: count;
> +}
> +
> +static ssize_t show_energy_performance_preference(
> + struct cpufreq_policy *policy, char *buf)
> +{
> + struct amd_cpudata *cpudata = policy->driver_data;
> + int preference;
> +
> + preference = amd_pstate_get_energy_pref_index(cpudata);
> + if (preference < 0)
> + return preference;
> +
> + return sysfs_emit(buf, "%s\n", energy_perf_strings[preference]);
> +}
> +
> cpufreq_freq_attr_ro(amd_pstate_max_freq);
> cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>
> cpufreq_freq_attr_ro(amd_pstate_highest_perf);
> +cpufreq_freq_attr_rw(energy_performance_preference);
> +cpufreq_freq_attr_ro(energy_performance_available_preferences);
>
> static struct freq_attr *amd_pstate_attr[] = {
> &amd_pstate_max_freq,
> @@ -603,6 +788,415 @@ static struct freq_attr *amd_pstate_attr[] = {
> NULL,
> };
>
> +static struct freq_attr *amd_pstate_epp_attr[] = {
> + &amd_pstate_max_freq,
> + &amd_pstate_lowest_nonlinear_freq,
> + &amd_pstate_highest_perf,
> + &energy_performance_preference,
> + &energy_performance_available_preferences,
> + NULL,
> +};
> +
> +static inline void update_boost_state(void)
> +{
> + u64 misc_en;
> + struct amd_cpudata *cpudata;
> +
> + cpudata = all_cpu_data[0];
> + rdmsrl(MSR_K7_HWCR, misc_en);
> + global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
> +}
> +
> +static int amd_pstate_init_cpu(unsigned int cpunum)
> +{
> + struct amd_cpudata *cpudata;
> +
> + cpudata = all_cpu_data[cpunum];
> + if (!cpudata) {
> + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
> + if (!cpudata)
> + return -ENOMEM;
> + WRITE_ONCE(all_cpu_data[cpunum], cpudata);
> +
> + cpudata->cpu = cpunum;
> + }
> + cpudata->epp_powersave = -EINVAL;
> + cpudata->epp_policy = 0;
> + pr_debug("controlling: cpu %d\n", cpunum);
> + return 0;
> +}
> +
> +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy)
> +{
> + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> + struct amd_cpudata *cpudata;
> + struct device *dev;
> + int rc;
> + u64 value;
> +
> + rc = amd_pstate_init_cpu(policy->cpu);
> + if (rc)
> + return rc;
> +
> + cpudata = all_cpu_data[policy->cpu];
> +
> + dev = get_cpu_device(policy->cpu);
> + if (!dev)
> + goto free_cpudata1;
> +
> + rc = amd_pstate_init_perf(cpudata);
> + if (rc)
> + goto free_cpudata1;
> +
> + min_freq = amd_get_min_freq(cpudata);
> + max_freq = amd_get_max_freq(cpudata);
> + nominal_freq = amd_get_nominal_freq(cpudata);
> + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
> + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
> + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
> + min_freq, max_freq);
> + ret = -EINVAL;
> + goto free_cpudata1;
> + }
> +
> + policy->min = min_freq;
> + policy->max = max_freq;
> +
> + policy->cpuinfo.min_freq = min_freq;
> + policy->cpuinfo.max_freq = max_freq;
> + /* It will be updated by governor */
> + policy->cur = policy->cpuinfo.min_freq;
> +
> + /* Initial processor data capability frequencies */
> + cpudata->max_freq = max_freq;
> + cpudata->min_freq = min_freq;
> + cpudata->nominal_freq = nominal_freq;
> + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
> +
> + policy->driver_data = cpudata;
> +
> + update_boost_state();
> + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
> +
> + policy->min = policy->cpuinfo.min_freq;
> + policy->max = policy->cpuinfo.max_freq;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC))
> + policy->fast_switch_possible = true;
> +
> + if (!shared_mem && boot_cpu_has(X86_FEATURE_CPPC)) {
Here `shared_mem` is used but it is always 0, as it is not overwritten anywhere.
Do we need `shared_mem` variable at all, can we use only X86_FEATURE_CPPC here?
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
> + if (ret)
> + return ret;
> + WRITE_ONCE(cpudata->cppc_cap1_cached, value);
> + }
> + amd_pstate_boost_init(cpudata);
> +
> + return 0;
> +
> +free_cpudata1:
> + kfree(cpudata);
> + return ret;
> +}
> +
> +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
> +{
> + int ret;
> +
> + ret = __amd_pstate_cpu_init(policy);
> + if (ret)
> + return ret;
> + /*
> + * Set the policy to powersave to provide a valid fallback value in case
> + * the default cpufreq governor is neither powersave nor performance.
> + */
> + policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +
> + return 0;
> +}
> +
> +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
> +{
> + pr_debug("CPU %d exiting\n", policy->cpu);
> + policy->fast_switch_possible = false;
> + return 0;
> +}
> +
> +static void amd_pstate_update_max_freq(unsigned int cpu)
> +{
> + struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
> +
> + if (!policy)
> + return;
> +
> + refresh_frequency_limits(policy);
> + cpufreq_cpu_put(policy);
> +}
> +
> +static void amd_pstate_epp_update_limits(unsigned int cpu)
> +{
> + mutex_lock(&amd_pstate_driver_lock);
> + update_boost_state();
> + if (global_params.cppc_boost_disabled) {
> + for_each_possible_cpu(cpu)
> + amd_pstate_update_max_freq(cpu);
> + } else {
> + cpufreq_update_policy(cpu);
> + }
> + mutex_unlock(&amd_pstate_driver_lock);
> +}
> +
> +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
> +
> +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata)
> +{
> + u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> + u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> + u32 max_limit = (hwp_req & 0xff);
> + u32 min_limit = (hwp_req & 0xff00) >> 8;
> + u32 boost_level1;
> +
> + /* If max and min are equal or already at max, nothing to boost */
> + if (max_limit == min_limit)
> + return;
> +
> + /* Set boost max and min to initial value */
> + if (!cpudata->cppc_boost_min)
> + cpudata->cppc_boost_min = min_limit;
> +
> + boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) + min_limit) >> 1);
> +
> + if (cpudata->cppc_boost_min < boost_level1)
> + cpudata->cppc_boost_min = boost_level1;
> + else if (cpudata->cppc_boost_min < AMD_CPPC_NOMINAL_PERF(hwp_cap))
> + cpudata->cppc_boost_min = AMD_CPPC_NOMINAL_PERF(hwp_cap);
> + else if (cpudata->cppc_boost_min == AMD_CPPC_NOMINAL_PERF(hwp_cap))
> + cpudata->cppc_boost_min = max_limit;
> + else
> + return;
> +
> + hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> + hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
> + wrmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
> + cpudata->last_update = cpudata->sample.time;
> +}
> +
> +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
> +{
> + bool expired;
> +
> + if (cpudata->cppc_boost_min) {
> + expired = time_after64(cpudata->sample.time, cpudata->last_update +
> + cppc_boost_hold_time_ns);
> +
> + if (expired) {
> + wrmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
> + cpudata->cppc_req_cached);
> + cpudata->cppc_boost_min = 0;
> + }
> + }
> +
> + cpudata->last_update = cpudata->sample.time;
> +}
> +
> +static inline void amd_pstate_boost_update_util(struct amd_cpudata *cpudata,
> + u64 time)
> +{
> + cpudata->sample.time = time;
> + if (smp_processor_id() != cpudata->cpu)
> + return;
> +
> + if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
> + bool do_io = false;
> +
> + cpudata->sched_flags = 0;
> + /*
> + * Set iowait_boost flag and update time. Since IO WAIT flag
> + * is set all the time, we can't just conclude that there is
> + * some IO bound activity is scheduled on this CPU with just
> + * one occurrence. If we receive at least two in two
> + * consecutive ticks, then we treat as boost candidate.
> + * This is leveraged from Intel Pstate driver.
> + */
> + if (time_before64(time, cpudata->last_io_update + 2 * TICK_NSEC))
> + do_io = true;
> +
> + cpudata->last_io_update = time;
> +
> + if (do_io)
> + amd_pstate_boost_up(cpudata);
> +
> + } else {
> + amd_pstate_boost_down(cpudata);
> + }
> +}
> +
> +static inline void amd_pstate_cppc_update_hook(struct update_util_data *data,
> + u64 time, unsigned int flags)
> +{
> + struct amd_cpudata *cpudata = container_of(data,
> + struct amd_cpudata, update_util);
> +
> + cpudata->sched_flags |= flags;
> +
> + if (smp_processor_id() == cpudata->cpu)
> + amd_pstate_boost_update_util(cpudata, time);
> +}
> +
> +static void amd_pstate_clear_update_util_hook(unsigned int cpu)
> +{
> + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> +
> + if (!cpudata->update_util_set)
> + return;
> +
> + cpufreq_remove_update_util_hook(cpu);
> + cpudata->update_util_set = false;
> + synchronize_rcu();
> +}
> +
> +static void amd_pstate_set_update_util_hook(unsigned int cpu_num)
> +{
> + struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
> +
> + if (!cppc_boost) {
> + if (cpudata->update_util_set)
> + amd_pstate_clear_update_util_hook(cpudata->cpu);
> + return;
> + }
> +
> + if (cpudata->update_util_set)
> + return;
> +
> + cpudata->sample.time = 0;
> + cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
> + amd_pstate_cppc_update_hook);
> + cpudata->update_util_set = true;
> +}
> +
> +static void amd_pstate_epp_init(unsigned int cpu)
> +{
> + struct amd_cpudata *cpudata = all_cpu_data[cpu];
> + u32 max_perf, min_perf;
> + u64 value;
> + s16 epp;
> + int ret;
> +
> + max_perf = READ_ONCE(cpudata->highest_perf);
> + min_perf = READ_ONCE(cpudata->lowest_perf);
> +
> + value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
> + min_perf = max_perf;
> +
> + /* Initial min/max values for CPPC Performance Controls Register */
> + value &= ~AMD_CPPC_MIN_PERF(~0L);
> + value |= AMD_CPPC_MIN_PERF(min_perf);
> +
> + value &= ~AMD_CPPC_MAX_PERF(~0L);
> + value |= AMD_CPPC_MAX_PERF(max_perf);
> +
> + /* CPPC EPP feature require to set zero to the desire perf bit */
> + value &= ~AMD_CPPC_DES_PERF(~0L);
> + value |= AMD_CPPC_DES_PERF(0);
> +
> + if (cpudata->epp_policy == cpudata->policy)
> + goto skip_epp;
> +
> + cpudata->epp_policy = cpudata->policy;
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + epp = amd_pstate_get_epp(cpudata, value);
> + cpudata->epp_powersave = epp;
> + if (epp < 0)
> + goto skip_epp;
> + /* force the epp value to be zero for performance policy */
> + epp = 0;
> + } else {
> + if (cpudata->epp_powersave < 0)
> + goto skip_epp;
> + /* Get BIOS pre-defined epp value */
> + epp = amd_pstate_get_epp(cpudata, value);
> + if (epp)
> + goto skip_epp;
> + epp = cpudata->epp_powersave;
> + }
> + /* Set initial EPP value */
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + value &= ~GENMASK_ULL(31, 24);
> + value |= (u64)epp << 24;
> + }
> +
> +skip_epp:
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> + if (!ret)
> + cpudata->epp_cached = epp;
> +}
> +
> +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata)
> +{
> + u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
> + u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
> + u32 max_limit = (hwp_cap >> 24) & 0xff;
> +
> + hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
> + hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
> + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
> +}
> +
> +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
> +{
> + struct amd_cpudata *cpudata;
> +
> + if (!policy->cpuinfo.max_freq)
> + return -ENODEV;
> +
> + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
> + policy->cpuinfo.max_freq, policy->max);
> +
> + cpudata = all_cpu_data[policy->cpu];
> + cpudata->policy = policy->policy;
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + mutex_lock(&amd_pstate_limits_lock);
> +
> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
> + amd_pstate_clear_update_util_hook(policy->cpu);
> + amd_pstate_set_max_limits(cpudata);
> + } else {
> + amd_pstate_set_update_util_hook(policy->cpu);
> + }
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC))
> + amd_pstate_epp_init(policy->cpu);
> +
> + mutex_unlock(&amd_pstate_limits_lock);
> + }
> +
> + return 0;
> +}
> +
> +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
> + struct cpufreq_policy_data *policy)
> +{
> + update_boost_state();
> + cpufreq_verify_within_cpu_limits(policy);
> +}
> +
> +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
> +{
> + amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
> + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
> + return 0;
> +}
> +
> static struct cpufreq_driver amd_pstate_driver = {
> .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
> .verify = amd_pstate_verify,
> @@ -616,8 +1210,20 @@ static struct cpufreq_driver amd_pstate_driver = {
> .attr = amd_pstate_attr,
> };
>
> +static struct cpufreq_driver amd_pstate_epp_driver = {
> + .flags = CPUFREQ_CONST_LOOPS,
> + .verify = amd_pstate_epp_verify_policy,
> + .setpolicy = amd_pstate_epp_set_policy,
> + .init = amd_pstate_epp_cpu_init,
> + .exit = amd_pstate_epp_cpu_exit,
> + .update_limits = amd_pstate_epp_update_limits,
> + .name = "amd_pstate_epp",
> + .attr = amd_pstate_epp_attr,
> +};
> +
> static int __init amd_pstate_init(void)
> {
> + static struct amd_cpudata **cpudata;
> int ret;
>
> if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
> @@ -641,10 +1247,17 @@ static int __init amd_pstate_init(void)
> if (cpufreq_get_current_driver())
> return -EEXIST;
>
> + if (!epp_off) {
> + WRITE_ONCE(cppc_active, 1);
> + if (!default_pstate_driver)
> + default_pstate_driver = &amd_pstate_epp_driver;
> + }
> +
> /* capability check */
> if (boot_cpu_has(X86_FEATURE_CPPC)) {
> pr_debug("AMD CPPC MSR based functionality is supported\n");
> - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
> + if (!cppc_active)
> + default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
> } else {
> pr_debug("AMD CPPC shared memory based functionality is supported\n");
> static_call_update(amd_pstate_enable, cppc_enable);
> @@ -652,6 +1265,10 @@ static int __init amd_pstate_init(void)
> static_call_update(amd_pstate_update_perf, cppc_update_perf);
> }
>
> + cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
> + if (!cpudata)
> + return -ENOMEM;
> + WRITE_ONCE(all_cpu_data, cpudata);
> /* enable amd pstate feature */
> ret = amd_pstate_enable(true);
> if (ret) {
> @@ -659,9 +1276,9 @@ static int __init amd_pstate_init(void)
> return ret;
> }
>
> - ret = cpufreq_register_driver(&amd_pstate_driver);
> + ret = cpufreq_register_driver(default_pstate_driver);
> if (ret)
> - pr_err("failed to register amd_pstate_driver with return %d\n",
> + pr_err("failed to register amd pstate driver with return %d\n",
> ret);
>
> return ret;
> @@ -676,8 +1293,14 @@ static int __init amd_pstate_param(char *str)
> if (!strcmp(str, "disable")) {
> cppc_load = 0;
> pr_info("driver is explicitly disabled\n");
> - } else if (!strcmp(str, "passive"))
> + } else if (!strcmp(str, "passive")) {
> + epp_off = 1;
> cppc_load = 1;
> + default_pstate_driver = &amd_pstate_driver;
> + } else if (!strcmp(str, "active")) {
> + cppc_load = 1;
> + default_pstate_driver = &amd_pstate_epp_driver;
> + }
>
> return 0;
> }
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 1c4b8659f171..7e6e8cab97b3 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
> u64 aperf;
> u64 mperf;
> u64 tsc;
> + u64 time;
> };
>
> /**
> @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
> * @prev: Last Aperf/Mperf/tsc count value read from register
> * @freq: current cpu frequency value
> * @boost_supported: check whether the Processor or SBIOS supports boost mode
> + * @epp_powersave: Last saved CPPC energy performance preference
> + when policy switched to performance
> + * @epp_policy: Last saved policy used to set energy-performance preference
> + * @epp_cached: Cached CPPC energy-performance preference value
> + * @policy: Cpufreq policy value
> + * @sched_flags: Store scheduler flags for possible cross CPU update
> + * @update_util_set: CPUFreq utility callback is set
> + * @last_update: Time stamp of the last performance state update
> + * @cppc_boost_min: Last CPPC boosted min performance state
> + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
> + * @update_util: Cpufreq utility callback information
> + * @sample: the stored performance sample
> *
> * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
> * represents all the attributes and goals that AMD P-State requests at runtime.
> @@ -72,6 +85,74 @@ struct amd_cpudata {
>
> u64 freq;
> bool boost_supported;
> +
> + /* EPP feature related attributes*/
> + s16 epp_powersave;
> + s16 epp_policy;
> + s16 epp_cached;
> + u32 policy;
> + u32 sched_flags;
> + bool update_util_set;
> + u64 last_update;
> + u64 last_io_update;
> + u32 cppc_boost_min;
> + u64 cppc_cap1_cached;
> + struct update_util_data update_util;
> + struct amd_aperf_mperf sample;
> +};
> +
> +/**
> + * struct amd_pstate_params - global parameters for the performance control
> + * @ cppc_boost_disabled wheher the core performance boost disabled
> + */
> +struct amd_pstate_params {
> + bool cppc_boost_disabled;
> +};
> +
> +#define AMD_CPPC_EPP_PERFORMANCE 0x00
> +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
> +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
> +#define AMD_CPPC_EPP_POWERSAVE 0xFF
> +
> +/*
> + * AMD Energy Preference Performance (EPP)
> + * The EPP is used in the CCLK DPM controller to drive
> + * the frequency that a core is going to operate during
> + * short periods of activity. EPP values will be utilized for
> + * different OS profiles (balanced, performance, power savings)
> + * display strings corresponding to EPP index in the
> + * energy_perf_strings[]
> + * index String
> + *-------------------------------------
> + * 0 default
> + * 1 performance
> + * 2 balance_performance
> + * 3 balance_power
> + * 4 power
> + */
> +enum energy_perf_value_index {
> + EPP_INDEX_DEFAULT = 0,
> + EPP_INDEX_PERFORMANCE,
> + EPP_INDEX_BALANCE_PERFORMANCE,
> + EPP_INDEX_BALANCE_POWERSAVE,
> + EPP_INDEX_POWERSAVE,
> +};
> +
> +static const char * const energy_perf_strings[] = {
> + [EPP_INDEX_DEFAULT] = "default",
> + [EPP_INDEX_PERFORMANCE] = "performance",
> + [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
> + [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
> + [EPP_INDEX_POWERSAVE] = "power",
> + NULL
> +};
> +
> +static unsigned int epp_values[] = {
> + [EPP_INDEX_DEFAULT] = 0,
> + [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
> + [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
> + [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
> + [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
> };
>
> #endif /* _LINUX_AMD_PSTATE_H */
Thanks & Regards,
Wyes
--
Thanks & Regards,
Wyes
Powered by blists - more mailing lists