[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ba4d1d2c-2fea-4578-a1f9-d4142f7c5e55@amd.com>
Date: Sat, 27 Apr 2024 15:24:40 -0500
From: Mario Limonciello <mario.limonciello@....com>
To: Perry Yuan <perry.yuan@....com>, rafael.j.wysocki@...el.com,
viresh.kumar@...aro.org, Ray.Huang@....com, gautham.shenoy@....com,
Borislav.Petkov@....com
Cc: Alexander.Deucher@....com, Xinmei.Huang@....com, Xiaojian.Du@....com,
Li.Meng@....com, linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 3/6] cpufreq: amd-pstate: implement cpb_boost sysfs
entry for boost control
On 4/26/2024 09:07, Mario Limonciello wrote:
> On 4/26/2024 01:34, Perry Yuan wrote:
>> From: Perry Yuan <Perry.Yuan@....com>
>>
>> With this new sysfs entry `cpb_boost`created, user can change CPU boost
>> state dynamically under `active`, `guided` and `passive` modes.
>> And the highest perf and frequency will also be updated as the boost
>> state changing.
>>
>> 0): check current boost state
>> cat /sys/devices/system/cpu/amd_pstate/cpb_boost
>>
>> 1): disable CPU boost
>> sudo bash -c "echo 0 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
>>
>> 2): enable CPU boost
>> sudo bash -c "echo 1 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
>>
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217931
>> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217618
>> Signed-off-by: Perry Yuan <Perry.Yuan@....com>
>> ---
>> drivers/cpufreq/amd-pstate.c | 99 ++++++++++++++++++++++++++++++++++++
>> 1 file changed, 99 insertions(+)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index aa17a3134497..53ef39c6dbfa 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -1295,6 +1295,103 @@ static ssize_t prefcore_show(struct device *dev,
>> return sysfs_emit(buf, "%s\n",
>> str_enabled_disabled(amd_pstate_prefcore));
>> }
>> +static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on)
>> +{
>> + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
>> + struct cppc_perf_ctrls perf_ctrls;
>> + u32 highest_perf, nominal_perf, nominal_freq, max_freq;
>> + int ret;
>> +
>> + if (!policy)
>> + return -ENODATA;
>> +
>> + highest_perf = READ_ONCE(cpudata->highest_perf);
>> + nominal_perf = READ_ONCE(cpudata->nominal_perf);
>> + nominal_freq = READ_ONCE(cpudata->nominal_freq);
>> + max_freq = READ_ONCE(cpudata->max_freq);
>> +
>> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
>> + u64 value = READ_ONCE(cpudata->cppc_req_cached);
>> +
>> + value &= ~GENMASK_ULL(7, 0);
>> + value |= on ? highest_perf : nominal_perf;
>> + WRITE_ONCE(cpudata->cppc_req_cached, value);
>> +
>> + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
>> + } else {
>> + perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
>> + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
>> + if (ret) {
>> + cpufreq_cpu_release(policy);
>> + pr_debug("failed to set energy perf value (%d)\n", ret);
>> + return ret;
>> + }
>> + }
>> +
>> + if (on)
>> + policy->cpuinfo.max_freq = max_freq;
>> + else
>> + policy->cpuinfo.max_freq = nominal_freq * 1000;
>> +
>> + policy->max = policy->cpuinfo.max_freq;
>> +
>> + if (cppc_state == AMD_PSTATE_PASSIVE) {
>> + ret = freq_qos_update_request(&cpudata->req[1],
>> + policy->cpuinfo.max_freq);
>> + }
>> +
>> + cpufreq_cpu_release(policy);
>> +
>> + return ret;
>> +}
>> +
>> +static ssize_t cpb_boost_show(struct device *dev,
>> + struct device_attribute *attr, char *buf)
>> +{
>> + return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost);
>> +}
>> +
>> +static ssize_t cpb_boost_store(struct device *dev, struct
>> device_attribute *b,
>> + const char *buf, size_t count)
>> +{
>> + bool new_state;
>> + ssize_t ret;
>> + int cpu;
>> +
>> + if (!amd_pstate_global_params.cpb_supported) {
>> + pr_err("Boost mode is not supported by this processor or
>> SBIOS\n");
>> + return -EINVAL;
>> + }
>
> As this is information that will be known when you first probe, I feel
> it would be better to affect the visibility of the file than let a user
> discover it doesn't work when they try to write it.
>
> Thinking down the road software like power-profiles-daemon and tuned
> will probably adapt to this new file and if their contract is that the
> file exists means they should write it that's going to turn into
> needless errors in any such system's log on every boot.
>
>> +
>> + ret = kstrtobool(buf, &new_state);
>> + if (ret)
>> + return ret;
>> +
>> + mutex_lock(&amd_pstate_driver_lock);
>> + amd_pstate_global_params.cpb_boost = !!new_state;
>> +
>> + for_each_present_cpu(cpu) {
>
> It seems to me that by making an attribute for every single CPU this is
> wrong. It means that writing boost for one CPU's files applies to all
> other CPUs too.
>
> If it's going to be a global attribute that is shared by all CPUs it
> should be a single file.
>
> Otherwise this is going to be a problem if (for example) software tries
> to turn on boost for only 1 CPU. Think of this sequence:
>
> 1) 16 CPUs, cpb_boost is currently turned off.
> 2) Software tries to turn CPB boost on for the "first" CPU only.
> 3) It reads the value of the first CPU and sees it's 0. So It changes
> the value for the first CPU (which causes a global change).
> 4) It reads the value for the second CPU and sees it's 1. It tries to
> change this back to zero, which again causes a global change.
> 5) It checks all the others and they're all 0 and it does nothing.
>
> So you see by having a global attribute which is shared with every
> single CPU you now have a flow problem that userspace can introduce.
>
Hi,
Sorry, I can see now that it current IS a global attribute in this
implementation (IE it's DEVICE_ATTR_RW not cpufreq_freq_attr_rw).
However as you're already actually applying it to each CPU individually,
I do think it is better to make it a per CPU attribute.
There could be cases that software only wants some of the cores to be
boosted, so could you please instead consider to convert to use
cpufreq_freq_attr_rw for the attribute and only apply one CPU at a time?
Thanks!
>> +
>> + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
>> + struct amd_cpudata *cpudata = policy->driver_data;
>> +
>> + if (!cpudata) {
>> + pr_err("cpudata is NULL\n");
>> + ret = -ENODATA;
>> + cpufreq_cpu_put(policy);
>> + goto err_exit;
>> + }
>> +
>> + amd_cpu_boost_update(cpudata,
>> amd_pstate_global_params.cpb_boost);
>> + refresh_frequency_limits(policy);
>> + cpufreq_cpu_put(policy);
>> + }
>> +
>> +err_exit:
>> + mutex_unlock(&amd_pstate_driver_lock);
>> + return ret < 0 ? ret : count;
>> +}
>> +
>> cpufreq_freq_attr_ro(amd_pstate_max_freq);
>> cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>> @@ -1305,6 +1402,7 @@
>> cpufreq_freq_attr_rw(energy_performance_preference);
>> cpufreq_freq_attr_ro(energy_performance_available_preferences);
>> static DEVICE_ATTR_RW(status);
>> static DEVICE_ATTR_RO(prefcore);
>> +static DEVICE_ATTR_RW(cpb_boost);
>> static struct freq_attr *amd_pstate_attr[] = {
>> &amd_pstate_max_freq,
>> @@ -1329,6 +1427,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
>> static struct attribute *pstate_global_attributes[] = {
>> &dev_attr_status.attr,
>> &dev_attr_prefcore.attr,
>> + &dev_attr_cpb_boost.attr,
>> NULL
>> };
>
Powered by blists - more mailing lists