[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <74888878-4d10-5822-158e-aaeb5bc4c4f5@amd.com>
Date: Thu, 10 Nov 2022 09:59:39 -0600
From: Nathan Fontenot <nathan.fontenot@....com>
To: "Limonciello, Mario" <mario.limonciello@....com>,
Perry Yuan <Perry.Yuan@....com>, rafael.j.wysocki@...el.com,
ray.huang@....com, viresh.kumar@...aro.org
Cc: Deepak.Sharma@....com, Alexander.Deucher@....com,
Shimmer.Huang@....com, Xiaojian.Du@....com, Li.Meng@....com,
linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v3 4/8] cpufreq: amd_pstate: add AMD Pstate EPP support
for the MSR based processors
On 11/7/22 14:32, Limonciello, Mario wrote:
> On 11/7/2022 11:57, Perry Yuan wrote:
>> Add EPP driver support for those AMD CPUs which has full MSR feature
>> enabled, The EPP is used in the DPM controller to drive the frequency
>> that a core is going to operate during short periods of activity.
>
> To avoid the run on sentence, here is a different wording proposal.
>
> Add EPP driver support for AMD SoCs which support a dedicated MSR for CPPC. EPP is used by the DPM controller to configure the frequency that a core operates at during short periods of activity.
>
>>
>> EPP values will be utilized for different OS profiles (balanced, performance,
>> power savings). cppc performance can be controlled by the user space interface
>> sys attributes for min and max frequency limits, when pstate driver is
>> working under power save policy.
>>
>> EPP scale is 0 - 255, 0 is the max performance and 255 is min level.
>> balance_performance (0x80) can provide best balance performance and watt for
>> most of system, meanwhile user can choose performance policy on needs.
>
> As a user reading this message it is confusing that there are values and then there are strings, but you don't know the linkage between the two. My proposal for rewording this:
>
> The SoC EPP targets are configured on a scale from 0 to 255 where 0 represents maximum performance and 255 represents maximum efficiency.
>
> The amd-pstate driver exports profile string names to userspace that are tied to specific EPP values.
>
> The balance_performance string (0x80) provides the best balance for efficiency versus power on most systems, but users can choose other strings to meet their needs as well.
>
>>
>> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences
>> default performance balance_performance balance_power power
>>
>> $ cat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_preference
>> balance_performance
>>
>> Signed-off-by: Perry Yuan <Perry.Yuan@....com>
>> ---
>> drivers/cpufreq/amd-pstate.c | 658 ++++++++++++++++++++++++++++++++++-
>> include/linux/amd-pstate.h | 81 +++++
>> 2 files changed, 734 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 14906431dc15..eb82bc6a7f66 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -60,8 +60,136 @@
>> * module parameter to be able to enable it manually for debugging.
>> */
>> static bool shared_mem __read_mostly;
>> +static int cppc_active __read_mostly;
>> +static int disable_pstate_load __initdata;
>> +static int epp_off __initdata;
>> -static struct cpufreq_driver amd_pstate_driver;
>> +static struct cpufreq_driver *default_pstate_driver;
>> +static struct amd_cpudata **all_cpu_data;
>> +
>> +static struct amd_pstate_params global_params;
>> +
>> +static DEFINE_MUTEX(amd_pstate_limits_lock);
>> +static DEFINE_MUTEX(amd_pstate_driver_lock);
>> +
>> +static bool cppc_boost __read_mostly;
>> +struct kobject *amd_pstate_kobj;
>> +
>> +#ifdef CONFIG_ACPI_CPPC_LIB
>> +static s16 amd_pstate_get_epp(struct amd_cpudata *cpudata, u64 cppc_req_cached)
>> +{
>> + s16 epp;
>> + struct cppc_perf_caps perf_caps;
>> + int ret;
>> +
>> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> + if (!cppc_req_cached) {
>> + epp = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
>> + &cppc_req_cached);
>> + if (epp)
>> + return epp;
>> + }
>> + epp = (cppc_req_cached >> 24) & 0xFF;
>> + } else {
>> + ret = cppc_get_epp_caps(cpudata->cpu, &perf_caps);
>> + if (ret < 0) {
>> + pr_debug("Could not retrieve energy perf value (%d)\n", ret);
>> + return -EIO;
>> + }
>> + epp = (s16) perf_caps.energy_perf;
>> + }
>> +
>> + return epp;
>> +}
>> +#endif
>> +
>> +static int amd_pstate_get_energy_pref_index(struct amd_cpudata *cpudata, int *raw_epp)
>> +{
>> + s16 epp;
>> + int index = -EINVAL;
>> +
>> + *raw_epp = 0;
>> + epp = amd_pstate_get_epp(cpudata, 0);
>> + if (epp < 0)
>> + return epp;
>> +
>> + switch (epp) {
>> + case AMD_CPPC_EPP_PERFORMANCE:
>> + index = EPP_INDEX_PERFORMANCE;
>> + break;
>> + case AMD_CPPC_EPP_BALANCE_PERFORMANCE:
>> + index = EPP_INDEX_BALANCE_PERFORMANCE;
>> + break;
>> + case AMD_CPPC_EPP_BALANCE_POWERSAVE:
>> + index = EPP_INDEX_BALANCE_POWERSAVE;
>> + break;
>> + case AMD_CPPC_EPP_POWERSAVE:
>> + index = EPP_INDEX_POWERSAVE;
>> + break;
>> + default:
>> + *raw_epp = epp;
>> + index = 0;
>> + }
>> +
>> + return index;
>> +}
>> +
>> +#ifdef CONFIG_ACPI_CPPC_LIB
>> +static int amd_pstate_set_epp(struct amd_cpudata *cpudata, u32 epp)
>> +{
>> + int ret;
>> + struct cppc_perf_ctrls perf_ctrls;
>> +
>> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> + u64 value = READ_ONCE(cpudata->cppc_req_cached);
>> +
>> + value &= ~GENMASK_ULL(31, 24);
>> + value |= (u64)epp << 24;
>> + WRITE_ONCE(cpudata->cppc_req_cached, value);
>> +
>> + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
>> + if (!ret)
>> + cpudata->epp_cached = epp;
>> + } else {
>> + perf_ctrls.energy_perf = epp;
>> + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls);
>> + if (ret) {
>> + pr_debug("failed to set energy perf value (%d)\n", ret);
>> + return ret;
>> + }
>> + cpudata->epp_cached = epp;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int amd_pstate_set_energy_pref_index(struct amd_cpudata *cpudata,
>> + int pref_index, bool use_raw,
>> + u32 raw_epp)
>> +{
>> + int epp = -EINVAL;
>> + int ret;
>> +
>> + if (!pref_index) {
>> + pr_debug("EPP pref_index is invalid\n");
>> + return -EINVAL;
>> + }
>> +
>> + if (use_raw)
>> + epp = raw_epp;
>> + else if (epp == -EINVAL)
>> + epp = epp_values[pref_index];
>> +
>> + if (epp > 0 && cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
>> + pr_debug("EPP cannot be set under performance policy\n");
>> + return -EBUSY;
>> + }
>> +
>> + ret = amd_pstate_set_epp(cpudata, epp);
>> +
>> + return ret;
>> +}
>> +#endif
>> static inline int pstate_enable(bool enable)
>> {
>> @@ -71,11 +199,25 @@ static inline int pstate_enable(bool enable)
>> static int cppc_enable(bool enable)
>> {
>> int cpu, ret = 0;
>> + struct cppc_perf_ctrls perf_ctrls;
>> for_each_present_cpu(cpu) {
>> ret = cppc_set_enable(cpu, enable);
>> if (ret)
>> return ret;
>> +
>> + /* Enable autonomous mode for EPP */
>> + if (!cppc_active) {
>> + ret = cppc_set_auto_epp(cpu, enable);
>> + if (ret)
>> + return ret;
>> +
>> + /* Set desired perf as zero to allow EPP firmware control */
>> + perf_ctrls.desired_perf = 0;
>> + ret = cppc_set_perf(cpu, &perf_ctrls);
>> + if (ret)
>> + return ret;
>> + }
>> }
>> return ret;
>> @@ -418,7 +560,7 @@ static void amd_pstate_boost_init(struct amd_cpudata *cpudata)
>> return;
>> cpudata->boost_supported = true;
>> - amd_pstate_driver.boost_enabled = true;
>> + default_pstate_driver->boost_enabled = true;
>> }
>> static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>> @@ -582,10 +724,74 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
>> return sprintf(&buf[0], "%u\n", perf);
>> }
>> +static ssize_t show_energy_performance_available_preferences(
>> + struct cpufreq_policy *policy, char *buf)
>> +{
>> + int i = 0;
>> + int ret = 0;
>> +
>> + while (energy_perf_strings[i] != NULL)
>> + ret += sprintf(&buf[ret], "%s ", energy_perf_strings[i++]);
>> +
>> + ret += sprintf(&buf[ret], "\n");
>> +
>> + return ret;
>> +}
>> +
>> +static ssize_t store_energy_performance_preference(
>> + struct cpufreq_policy *policy, const char *buf, size_t count)
>> +{
>> + struct amd_cpudata *cpudata = policy->driver_data;
>> + char str_preference[21];
>> + bool raw = false;
>> + ssize_t ret;
>> + u32 epp = 0;
>> +
>> + ret = sscanf(buf, "%20s", str_preference);
>> + if (ret != 1)
>> + return -EINVAL;
>> +
>> + ret = match_string(energy_perf_strings, -1, str_preference);
>> + if (ret < 0) {
>> + ret = kstrtouint(buf, 10, &epp);
>> + if (ret)
>> + return ret;
>> +
>> + if ((epp > 255) || (epp < 0))
>> + return -EINVAL;
>> +
>> + raw = true;
>> + }
>
> What's the reason for supporting putting the raw number in here for stuff "in between"? I think this is going to pretty confusing to userspace that you can use string values or integer values. It also means that if userspace writes an integer with a mapping to string and tries to read it back they'll get the string rather than the integer!
>
> I can understand using the raw values for internal characterization and development to possibly introduce a new mapping string, but I don't think that makes sense in the kernel.
>
This is really doing what Intel does for handling EPP settings. Yes, writing a value and getting back a string
could be a bit confusing bit it is already done from the Intel side. I think keeping EPP value setting common
would be a good thing if we can do it.
I don't think we should remove the ability to set raw values, we're allowed a range of 0 - 255 for the EPP
setting. Why we then limit ourselves to only 4 or so values?
-Nathan
>> +
>> + mutex_lock(&amd_pstate_limits_lock);
>> + ret = amd_pstate_set_energy_pref_index(cpudata, ret, raw, epp);
>> + mutex_unlock(&amd_pstate_limits_lock);
>> +
>> + return ret ?: count;
>> +}
>> +
>> +static ssize_t show_energy_performance_preference(
>> + struct cpufreq_policy *policy, char *buf)
>> +{
>> + struct amd_cpudata *cpudata = policy->driver_data;
>> + int preference, raw_epp;
>> +
>> + preference = amd_pstate_get_energy_pref_index(cpudata, &raw_epp);
>> + if (preference < 0)
>> + return preference;
>> +
>> + if (raw_epp)
>> + return sprintf(buf, "%d\n", raw_epp);
>> + else
>> + return sprintf(buf, "%s\n", energy_perf_strings[preference]);
>> +}
>> +
>> cpufreq_freq_attr_ro(amd_pstate_max_freq);
>> cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>> cpufreq_freq_attr_ro(amd_pstate_highest_perf);
>> +cpufreq_freq_attr_rw(energy_performance_preference);
>> +cpufreq_freq_attr_ro(energy_performance_available_preferences);
>> static struct freq_attr *amd_pstate_attr[] = {
>> &amd_pstate_max_freq,
>> @@ -594,6 +800,415 @@ static struct freq_attr *amd_pstate_attr[] = {
>> NULL,
>> };
>> +static struct freq_attr *amd_pstate_epp_attr[] = {
>> + &amd_pstate_max_freq,
>> + &amd_pstate_lowest_nonlinear_freq,
>> + &amd_pstate_highest_perf,
>> + &energy_performance_preference,
>> + &energy_performance_available_preferences,
>> + NULL,
>> +};
>> +
>> +static inline void update_boost_state(void)
>> +{
>> + u64 misc_en;
>> + struct amd_cpudata *cpudata;
>> +
>> + cpudata = all_cpu_data[0];
>> + rdmsrl(MSR_K7_HWCR, misc_en);
>> + global_params.cppc_boost_disabled = misc_en & BIT_ULL(25);
>> +}
>> +
>> +static int amd_pstate_init_cpu(unsigned int cpunum)
>> +{
>> + struct amd_cpudata *cpudata;
>> +
>> + cpudata = all_cpu_data[cpunum];
>> + if (!cpudata) {
>> + cpudata = kzalloc(sizeof(*cpudata), GFP_KERNEL);
>> + if (!cpudata)
>> + return -ENOMEM;
>> + WRITE_ONCE(all_cpu_data[cpunum], cpudata);
>> +
>> + cpudata->cpu = cpunum;
>> + }
>> + cpudata->epp_powersave = -EINVAL;
>> + cpudata->epp_policy = 0;
>> + pr_debug("controlling: cpu %d\n", cpunum);
>> + return 0;
>> +}
>> +
>> +static int __amd_pstate_cpu_init(struct cpufreq_policy *policy)
>> +{
>> + int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
>> + struct amd_cpudata *cpudata;
>> + struct device *dev;
>> + int rc;
>> + u64 value;
>> +
>> + rc = amd_pstate_init_cpu(policy->cpu);
>> + if (rc)
>> + return rc;
>> +
>> + cpudata = all_cpu_data[policy->cpu];
>> +
>> + dev = get_cpu_device(policy->cpu);
>> + if (!dev)
>> + goto free_cpudata1;
>> +
>> + rc = amd_pstate_init_perf(cpudata);
>> + if (rc)
>> + goto free_cpudata1;
>> +
>> + min_freq = amd_get_min_freq(cpudata);
>> + max_freq = amd_get_max_freq(cpudata);
>> + nominal_freq = amd_get_nominal_freq(cpudata);
>> + lowest_nonlinear_freq = amd_get_lowest_nonlinear_freq(cpudata);
>> + if (min_freq < 0 || max_freq < 0 || min_freq > max_freq) {
>> + dev_err(dev, "min_freq(%d) or max_freq(%d) value is incorrect\n",
>> + min_freq, max_freq);
>> + ret = -EINVAL;
>> + goto free_cpudata1;
>> + }
>> +
>> + policy->min = min_freq;
>> + policy->max = max_freq;
>> +
>> + policy->cpuinfo.min_freq = min_freq;
>> + policy->cpuinfo.max_freq = max_freq;
>> + /* It will be updated by governor */
>> + policy->cur = policy->cpuinfo.min_freq;
>> +
>> + /* Initial processor data capability frequencies */
>> + cpudata->max_freq = max_freq;
>> + cpudata->min_freq = min_freq;
>> + cpudata->nominal_freq = nominal_freq;
>> + cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
>> +
>> + policy->driver_data = cpudata;
>> +
>> + update_boost_state();
>> + cpudata->epp_cached = amd_pstate_get_epp(cpudata, value);
>> +
>> + policy->min = policy->cpuinfo.min_freq;
>> + policy->max = policy->cpuinfo.max_freq;
>> +
>> + if (boot_cpu_has(X86_FEATURE_CPPC))
>> + policy->fast_switch_possible = true;
>> +
>> + if (!shared_mem && boot_cpu_has(X86_FEATURE_CPPC)) {
>> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>> + if (ret)
>> + return ret;
>> + WRITE_ONCE(cpudata->cppc_req_cached, value);
>> +
>> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1, &value);
>> + if (ret)
>> + return ret;
>> + WRITE_ONCE(cpudata->cppc_cap1_cached, value);
>> + }
>> + amd_pstate_boost_init(cpudata);
>> +
>> + return 0;
>> +
>> +free_cpudata1:
>> + kfree(cpudata);
>> + return ret;
>> +}
>> +
>> +static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>> +{
>> + int ret;
>> +
>> + ret = __amd_pstate_cpu_init(policy);
>> + if (ret)
>> + return ret;
>> + /*
>> + * Set the policy to powersave to provide a valid fallback value in case
>> + * the default cpufreq governor is neither powersave nor performance.
>> + */
>> + policy->policy = CPUFREQ_POLICY_POWERSAVE;
>> +
>> + return 0;
>> +}
>> +
>> +static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>> +{
>> + pr_debug("amd-pstate: CPU %d exiting\n", policy->cpu);
>
> Drop the "amd-pstate:", this file has pr_fmt.
>
>> + policy->fast_switch_possible = false;
>> + return 0;
>> +}
>> +
>> +static void amd_pstate_update_max_freq(unsigned int cpu)
>> +{
>> + struct cpufreq_policy *policy = policy = cpufreq_cpu_get(cpu);
>> +
>> + if (!policy)
>> + return;
>> +
>> + refresh_frequency_limits(policy);
>> + cpufreq_cpu_put(policy);
>> +}
>> +
>> +static void amd_pstate_epp_update_limits(unsigned int cpu)
>> +{
>> + mutex_lock(&amd_pstate_driver_lock);
>> + update_boost_state();
>> + if (global_params.cppc_boost_disabled) {
>> + for_each_possible_cpu(cpu)
>> + amd_pstate_update_max_freq(cpu);
>> + } else {
>> + cpufreq_update_policy(cpu);
>> + }
>> + mutex_unlock(&amd_pstate_driver_lock);
>> +}
>> +
>> +static int cppc_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
>> +
>> +static inline void amd_pstate_boost_up(struct amd_cpudata *cpudata)
>> +{
>> + u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
>> + u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
>> + u32 max_limit = (hwp_req & 0xff);
>> + u32 min_limit = (hwp_req & 0xff00) >> 8;
>> + u32 boost_level1;
>> +
>> + /* If max and min are equal or already at max, nothing to boost */
>> + if (max_limit == min_limit)
>> + return;
>> +
>> + /* Set boost max and min to initial value */
>> + if (!cpudata->cppc_boost_min)
>> + cpudata->cppc_boost_min = min_limit;
>> +
>> + boost_level1 = ((AMD_CPPC_NOMINAL_PERF(hwp_cap) + min_limit) >> 1);
>> +
>> + if (cpudata->cppc_boost_min < boost_level1)
>> + cpudata->cppc_boost_min = boost_level1;
>> + else if (cpudata->cppc_boost_min < AMD_CPPC_NOMINAL_PERF(hwp_cap))
>> + cpudata->cppc_boost_min = AMD_CPPC_NOMINAL_PERF(hwp_cap);
>> + else if (cpudata->cppc_boost_min == AMD_CPPC_NOMINAL_PERF(hwp_cap))
>> + cpudata->cppc_boost_min = max_limit;
>> + else
>> + return;
>> +
>> + hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
>> + hwp_req |= AMD_CPPC_MIN_PERF(cpudata->cppc_boost_min);
>> + wrmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
>> + cpudata->last_update = cpudata->sample.time;
>> +}
>> +
>> +static inline void amd_pstate_boost_down(struct amd_cpudata *cpudata)
>> +{
>> + bool expired;
>> +
>> + if (cpudata->cppc_boost_min) {
>> + expired = time_after64(cpudata->sample.time, cpudata->last_update +
>> + cppc_boost_hold_time_ns);
>> +
>> + if (expired) {
>> + wrmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ,
>> + cpudata->cppc_req_cached);
>> + cpudata->cppc_boost_min = 0;
>> + }
>> + }
>> +
>> + cpudata->last_update = cpudata->sample.time;
>> +}
>> +
>> +static inline void amd_pstate_boost_update_util(struct amd_cpudata *cpudata,
>> + u64 time)
>> +{
>> + cpudata->sample.time = time;
>> + if (smp_processor_id() != cpudata->cpu)
>> + return;
>> +
>> + if (cpudata->sched_flags & SCHED_CPUFREQ_IOWAIT) {
>> + bool do_io = false;
>> +
>> + cpudata->sched_flags = 0;
>> + /*
>> + * Set iowait_boost flag and update time. Since IO WAIT flag
>> + * is set all the time, we can't just conclude that there is
>> + * some IO bound activity is scheduled on this CPU with just
>> + * one occurrence. If we receive at least two in two
>> + * consecutive ticks, then we treat as boost candidate.
>> + * This is leveraged from Intel Pstate driver.
>> + */
>> + if (time_before64(time, cpudata->last_io_update + 2 * TICK_NSEC))
>> + do_io = true;
>> +
>> + cpudata->last_io_update = time;
>> +
>> + if (do_io)
>> + amd_pstate_boost_up(cpudata);
>> +
>> + } else {
>> + amd_pstate_boost_down(cpudata);
>> + }
>> +}
>> +
>> +static inline void amd_pstate_cppc_update_hook(struct update_util_data *data,
>> + u64 time, unsigned int flags)
>> +{
>> + struct amd_cpudata *cpudata = container_of(data,
>> + struct amd_cpudata, update_util);
>> +
>> + cpudata->sched_flags |= flags;
>> +
>> + if (smp_processor_id() == cpudata->cpu)
>> + amd_pstate_boost_update_util(cpudata, time);
>> +}
>> +
>> +static void amd_pstate_clear_update_util_hook(unsigned int cpu)
>> +{
>> + struct amd_cpudata *cpudata = all_cpu_data[cpu];
>> +
>> + if (!cpudata->update_util_set)
>> + return;
>> +
>> + cpufreq_remove_update_util_hook(cpu);
>> + cpudata->update_util_set = false;
>> + synchronize_rcu();
>> +}
>> +
>> +static void amd_pstate_set_update_util_hook(unsigned int cpu_num)
>> +{
>> + struct amd_cpudata *cpudata = all_cpu_data[cpu_num];
>> +
>> + if (!cppc_boost) {
>> + if (cpudata->update_util_set)
>> + amd_pstate_clear_update_util_hook(cpudata->cpu);
>> + return;
>> + }
>> +
>> + if (cpudata->update_util_set)
>> + return;
>> +
>> + cpudata->sample.time = 0;
>> + cpufreq_add_update_util_hook(cpu_num, &cpudata->update_util,
>> + amd_pstate_cppc_update_hook);
>> + cpudata->update_util_set = true;
>> +}
>> +
>> +static void amd_pstate_epp_init(unsigned int cpu)
>> +{
>> + struct amd_cpudata *cpudata = all_cpu_data[cpu];
>> + u32 max_perf, min_perf;
>> + u64 value;
>> + s16 epp;
>> + int ret;
>> +
>> + max_perf = READ_ONCE(cpudata->highest_perf);
>> + min_perf = READ_ONCE(cpudata->lowest_perf);
>> +
>> + value = READ_ONCE(cpudata->cppc_req_cached);
>> +
>> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE)
>> + min_perf = max_perf;
>> +
>> + /* Initial min/max values for CPPC Performance Controls Register */
>> + value &= ~AMD_CPPC_MIN_PERF(~0L);
>> + value |= AMD_CPPC_MIN_PERF(min_perf);
>> +
>> + value &= ~AMD_CPPC_MAX_PERF(~0L);
>> + value |= AMD_CPPC_MAX_PERF(max_perf);
>> +
>> + /* CPPC EPP feature require to set zero to the desire perf bit */
>> + value &= ~AMD_CPPC_DES_PERF(~0L);
>> + value |= AMD_CPPC_DES_PERF(0);
>> +
>> + if (cpudata->epp_policy == cpudata->policy)
>> + goto skip_epp;
>> +
>> + cpudata->epp_policy = cpudata->policy;
>> +
>> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
>> + epp = amd_pstate_get_epp(cpudata, value);
>> + cpudata->epp_powersave = epp;
>> + if (epp < 0)
>> + goto skip_epp;
>> + /* force the epp value to be zero for performance policy */
>> + epp = 0;
>> + } else {
>> + if (cpudata->epp_powersave < 0)
>> + goto skip_epp;
>> + /* Get BIOS pre-defined epp value */
>> + epp = amd_pstate_get_epp(cpudata, value);
>> + if (epp)
>> + goto skip_epp;
>> + epp = cpudata->epp_powersave;
>> + }
>> + /* Set initial EPP value */
>> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> + value &= ~GENMASK_ULL(31, 24);
>> + value |= (u64)epp << 24;
>> + }
>> +
>> +skip_epp:
>> + WRITE_ONCE(cpudata->cppc_req_cached, value);
>> + ret = wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
>> + if (!ret)
>> + cpudata->epp_cached = epp;
>> +}
>> +
>> +static void amd_pstate_set_max_limits(struct amd_cpudata *cpudata)
>> +{
>> + u64 hwp_cap = READ_ONCE(cpudata->cppc_cap1_cached);
>> + u64 hwp_req = READ_ONCE(cpudata->cppc_req_cached);
>> + u32 max_limit = (hwp_cap >> 24) & 0xff;
>> +
>> + hwp_req &= ~AMD_CPPC_MIN_PERF(~0L);
>> + hwp_req |= AMD_CPPC_MIN_PERF(max_limit);
>> + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, hwp_req);
>> +}
>> +
>> +static int amd_pstate_epp_set_policy(struct cpufreq_policy *policy)
>> +{
>> + struct amd_cpudata *cpudata;
>> +
>> + if (!policy->cpuinfo.max_freq)
>> + return -ENODEV;
>> +
>> + pr_debug("set_policy: cpuinfo.max %u policy->max %u\n",
>> + policy->cpuinfo.max_freq, policy->max);
>> +
>> + cpudata = all_cpu_data[policy->cpu];
>> + cpudata->policy = policy->policy;
>> +
>> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> + mutex_lock(&amd_pstate_limits_lock);
>> +
>> + if (cpudata->policy == CPUFREQ_POLICY_PERFORMANCE) {
>> + amd_pstate_clear_update_util_hook(policy->cpu);
>> + amd_pstate_set_max_limits(cpudata);
>> + } else {
>> + amd_pstate_set_update_util_hook(policy->cpu);
>> + }
>> +
>> + if (boot_cpu_has(X86_FEATURE_CPPC))
>> + amd_pstate_epp_init(policy->cpu);
>> +
>> + mutex_unlock(&amd_pstate_limits_lock);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static void amd_pstate_verify_cpu_policy(struct amd_cpudata *cpudata,
>> + struct cpufreq_policy_data *policy)
>> +{
>> + update_boost_state();
>> + cpufreq_verify_within_cpu_limits(policy);
>> +}
>> +
>> +static int amd_pstate_epp_verify_policy(struct cpufreq_policy_data *policy)
>> +{
>> + amd_pstate_verify_cpu_policy(all_cpu_data[policy->cpu], policy);
>> + pr_debug("policy_max =%d, policy_min=%d\n", policy->max, policy->min);
>> + return 0;
>> +}
>> +
>> static struct cpufreq_driver amd_pstate_driver = {
>> .flags = CPUFREQ_CONST_LOOPS | CPUFREQ_NEED_UPDATE_LIMITS,
>> .verify = amd_pstate_verify,
>> @@ -607,8 +1222,20 @@ static struct cpufreq_driver amd_pstate_driver = {
>> .attr = amd_pstate_attr,
>> };
>> +static struct cpufreq_driver amd_pstate_epp_driver = {
>> + .flags = CPUFREQ_CONST_LOOPS,
>> + .verify = amd_pstate_epp_verify_policy,
>> + .setpolicy = amd_pstate_epp_set_policy,
>> + .init = amd_pstate_epp_cpu_init,
>> + .exit = amd_pstate_epp_cpu_exit,
>> + .update_limits = amd_pstate_epp_update_limits,
>> + .name = "amd_pstate_epp",
>> + .attr = amd_pstate_epp_attr,
>> +};
>> +
>> static int __init amd_pstate_init(void)
>> {
>> + static struct amd_cpudata **cpudata;
>> int ret;
>> if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
>> @@ -623,10 +1250,18 @@ static int __init amd_pstate_init(void)
>> if (cpufreq_get_current_driver())
>> return -EEXIST;
>> + if (!epp_off) {
>> + WRITE_ONCE(cppc_active, 1);
>> + if (!default_pstate_driver)
>> + default_pstate_driver = &amd_pstate_epp_driver;
>> + }
>> + pr_info("AMD CPPC loading with %s driver instance.\n", default_pstate_driver->name);
>
> This is pretty noisy, do we really need it on every boot if we can easily check it from sysfs?
>
>> +
>> /* capability check */
>> if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> + if (!cppc_active)
>> + default_pstate_driver->adjust_perf = amd_pstate_adjust_perf;
>> pr_debug("AMD CPPC MSR based functionality is supported\n");
>> - amd_pstate_driver.adjust_perf = amd_pstate_adjust_perf;
>> } else if (shared_mem) {
>> static_call_update(amd_pstate_enable, cppc_enable);
>> static_call_update(amd_pstate_init_perf, cppc_init_perf);
>> @@ -636,6 +1271,10 @@ static int __init amd_pstate_init(void)
>> return -ENODEV;
>> }
>> + cpudata = vzalloc(array_size(sizeof(void *), num_possible_cpus()));
>> + if (!cpudata)
>> + return -ENOMEM;
>> + WRITE_ONCE(all_cpu_data, cpudata);
>> /* enable amd pstate feature */
>> ret = amd_pstate_enable(true);
>> if (ret) {
>> @@ -643,9 +1282,9 @@ static int __init amd_pstate_init(void)
>> return ret;
>> }
>> - ret = cpufreq_register_driver(&amd_pstate_driver);
>> + ret = cpufreq_register_driver(default_pstate_driver);
>> if (ret)
>> - pr_err("failed to register amd_pstate_driver with return %d\n",
>> + pr_err("failed to register amd pstate driver with return %d\n",
>> ret);
>> return ret;
>> @@ -657,6 +1296,15 @@ static int __init amd_pstate_param(char *str)
>> if (!str)
>> return -EINVAL;
>> + if (!strcmp(str, "disable"))
>> + disable_pstate_load = 1;
>> + else if (!strcmp(str, "active")) {
>> + default_pstate_driver = &amd_pstate_epp_driver;
>> + } else if (!strcmp(str, "passive")) {
>> + epp_off = 1;
>> + default_pstate_driver = &amd_pstate_driver;
>> + }
>> +
>> /* enable shared memory type CPPC ,if you processor has no MSR, you have to add this
>> * to your grub to make cppc driver loaded successfully.
>> */
>> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
>> index 1c4b8659f171..7e6e8cab97b3 100644
>> --- a/include/linux/amd-pstate.h
>> +++ b/include/linux/amd-pstate.h
>> @@ -25,6 +25,7 @@ struct amd_aperf_mperf {
>> u64 aperf;
>> u64 mperf;
>> u64 tsc;
>> + u64 time;
>> };
>> /**
>> @@ -47,6 +48,18 @@ struct amd_aperf_mperf {
>> * @prev: Last Aperf/Mperf/tsc count value read from register
>> * @freq: current cpu frequency value
>> * @boost_supported: check whether the Processor or SBIOS supports boost mode
>> + * @epp_powersave: Last saved CPPC energy performance preference
>> + when policy switched to performance
>> + * @epp_policy: Last saved policy used to set energy-performance preference
>> + * @epp_cached: Cached CPPC energy-performance preference value
>> + * @policy: Cpufreq policy value
>> + * @sched_flags: Store scheduler flags for possible cross CPU update
>> + * @update_util_set: CPUFreq utility callback is set
>> + * @last_update: Time stamp of the last performance state update
>> + * @cppc_boost_min: Last CPPC boosted min performance state
>> + * @cppc_cap1_cached: Cached value of the last CPPC Capabilities MSR
>> + * @update_util: Cpufreq utility callback information
>> + * @sample: the stored performance sample
>> *
>> * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
>> * represents all the attributes and goals that AMD P-State requests at runtime.
>> @@ -72,6 +85,74 @@ struct amd_cpudata {
>> u64 freq;
>> bool boost_supported;
>> +
>> + /* EPP feature related attributes*/
>> + s16 epp_powersave;
>> + s16 epp_policy;
>> + s16 epp_cached;
>> + u32 policy;
>> + u32 sched_flags;
>> + bool update_util_set;
>> + u64 last_update;
>> + u64 last_io_update;
>> + u32 cppc_boost_min;
>> + u64 cppc_cap1_cached;
>> + struct update_util_data update_util;
>> + struct amd_aperf_mperf sample;
>> +};
>> +
>> +/**
>> + * struct amd_pstate_params - global parameters for the performance control
>> + * @ cppc_boost_disabled wheher the core performance boost disabled
>> + */
>> +struct amd_pstate_params {
>> + bool cppc_boost_disabled;
>> +};
>> +
>> +#define AMD_CPPC_EPP_PERFORMANCE 0x00
>> +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
>> +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
>> +#define AMD_CPPC_EPP_POWERSAVE 0xFF
>> +
>> +/*
>> + * AMD Energy Preference Performance (EPP)
>> + * The EPP is used in the CCLK DPM controller to drive
>> + * the frequency that a core is going to operate during
>> + * short periods of activity. EPP values will be utilized for
>> + * different OS profiles (balanced, performance, power savings)
>> + * display strings corresponding to EPP index in the
>> + * energy_perf_strings[]
>> + * index String
>> + *-------------------------------------
>> + * 0 default
>> + * 1 performance
>> + * 2 balance_performance
>> + * 3 balance_power
>> + * 4 power
>> + */
>> +enum energy_perf_value_index {
>> + EPP_INDEX_DEFAULT = 0,
>> + EPP_INDEX_PERFORMANCE,
>> + EPP_INDEX_BALANCE_PERFORMANCE,
>> + EPP_INDEX_BALANCE_POWERSAVE,
>> + EPP_INDEX_POWERSAVE,
>> +};
>> +
>> +static const char * const energy_perf_strings[] = {
>> + [EPP_INDEX_DEFAULT] = "default",
>> + [EPP_INDEX_PERFORMANCE] = "performance",
>> + [EPP_INDEX_BALANCE_PERFORMANCE] = "balance_performance",
>> + [EPP_INDEX_BALANCE_POWERSAVE] = "balance_power",
>> + [EPP_INDEX_POWERSAVE] = "power",
>> + NULL
>> +};
>> +
>> +static unsigned int epp_values[] = {
>> + [EPP_INDEX_DEFAULT] = 0,
>> + [EPP_INDEX_PERFORMANCE] = AMD_CPPC_EPP_PERFORMANCE,
>> + [EPP_INDEX_BALANCE_PERFORMANCE] = AMD_CPPC_EPP_BALANCE_PERFORMANCE,
>> + [EPP_INDEX_BALANCE_POWERSAVE] = AMD_CPPC_EPP_BALANCE_POWERSAVE,
>> + [EPP_INDEX_POWERSAVE] = AMD_CPPC_EPP_POWERSAVE,
>> };
>> #endif /* _LINUX_AMD_PSTATE_H */
>
Powered by blists - more mailing lists