[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <83f93e72-b03a-4167-ab96-0a6f8017e54c@amd.com>
Date: Fri, 26 Apr 2024 09:07:50 -0500
From: Mario Limonciello <mario.limonciello@....com>
To: Perry Yuan <perry.yuan@....com>, rafael.j.wysocki@...el.com,
viresh.kumar@...aro.org, Ray.Huang@....com, gautham.shenoy@....com,
Borislav.Petkov@....com
Cc: Alexander.Deucher@....com, Xinmei.Huang@....com, Xiaojian.Du@....com,
Li.Meng@....com, linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v8 3/6] cpufreq: amd-pstate: implement cpb_boost sysfs
entry for boost control
On 4/26/2024 01:34, Perry Yuan wrote:
> From: Perry Yuan <Perry.Yuan@....com>
>
> With this new sysfs entry `cpb_boost`created, user can change CPU boost
> state dynamically under `active`, `guided` and `passive` modes.
> And the highest perf and frequency will also be updated as the boost
> state changing.
>
> 0): check current boost state
> cat /sys/devices/system/cpu/amd_pstate/cpb_boost
>
> 1): disable CPU boost
> sudo bash -c "echo 0 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
>
> 2): enable CPU boost
> sudo bash -c "echo 1 > /sys/devices/system/cpu/amd_pstate/cpb_boost"
>
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217931
> Link: https://bugzilla.kernel.org/show_bug.cgi?id=217618
> Signed-off-by: Perry Yuan <Perry.Yuan@....com>
> ---
> drivers/cpufreq/amd-pstate.c | 99 ++++++++++++++++++++++++++++++++++++
> 1 file changed, 99 insertions(+)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index aa17a3134497..53ef39c6dbfa 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -1295,6 +1295,103 @@ static ssize_t prefcore_show(struct device *dev,
> return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
> }
>
> +static int amd_cpu_boost_update(struct amd_cpudata *cpudata, u32 on)
> +{
> + struct cpufreq_policy *policy = cpufreq_cpu_acquire(cpudata->cpu);
> + struct cppc_perf_ctrls perf_ctrls;
> + u32 highest_perf, nominal_perf, nominal_freq, max_freq;
> + int ret;
> +
> + if (!policy)
> + return -ENODATA;
> +
> + highest_perf = READ_ONCE(cpudata->highest_perf);
> + nominal_perf = READ_ONCE(cpudata->nominal_perf);
> + nominal_freq = READ_ONCE(cpudata->nominal_freq);
> + max_freq = READ_ONCE(cpudata->max_freq);
> +
> + if (boot_cpu_has(X86_FEATURE_CPPC)) {
> + u64 value = READ_ONCE(cpudata->cppc_req_cached);
> +
> + value &= ~GENMASK_ULL(7, 0);
> + value |= on ? highest_perf : nominal_perf;
> + WRITE_ONCE(cpudata->cppc_req_cached, value);
> +
> + wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, value);
> + } else {
> + perf_ctrls.max_perf = on ? highest_perf : nominal_perf;
> + ret = cppc_set_epp_perf(cpudata->cpu, &perf_ctrls, 1);
> + if (ret) {
> + cpufreq_cpu_release(policy);
> + pr_debug("failed to set energy perf value (%d)\n", ret);
> + return ret;
> + }
> + }
> +
> + if (on)
> + policy->cpuinfo.max_freq = max_freq;
> + else
> + policy->cpuinfo.max_freq = nominal_freq * 1000;
> +
> + policy->max = policy->cpuinfo.max_freq;
> +
> + if (cppc_state == AMD_PSTATE_PASSIVE) {
> + ret = freq_qos_update_request(&cpudata->req[1],
> + policy->cpuinfo.max_freq);
> + }
> +
> + cpufreq_cpu_release(policy);
> +
> + return ret;
> +}
> +
> +static ssize_t cpb_boost_show(struct device *dev,
> + struct device_attribute *attr, char *buf)
> +{
> + return sysfs_emit(buf, "%u\n", amd_pstate_global_params.cpb_boost);
> +}
> +
> +static ssize_t cpb_boost_store(struct device *dev, struct device_attribute *b,
> + const char *buf, size_t count)
> +{
> + bool new_state;
> + ssize_t ret;
> + int cpu;
> +
> + if (!amd_pstate_global_params.cpb_supported) {
> + pr_err("Boost mode is not supported by this processor or SBIOS\n");
> + return -EINVAL;
> + }
As this is information that will be known when you first probe, I feel
it would be better to affect the visibility of the file than let a user
discover it doesn't work when they try to write it.
Thinking down the road software like power-profiles-daemon and tuned
will probably adapt to this new file and if their contract is that the
file exists means they should write it that's going to turn into
needless errors in any such system's log on every boot.
> +
> + ret = kstrtobool(buf, &new_state);
> + if (ret)
> + return ret;
> +
> + mutex_lock(&amd_pstate_driver_lock);
> + amd_pstate_global_params.cpb_boost = !!new_state;
> +
> + for_each_present_cpu(cpu) {
It seems to me that by making an attribute for every single CPU this is
wrong. It means that writing boost for one CPU's files applies to all
other CPUs too.
If it's going to be a global attribute that is shared by all CPUs it
should be a single file.
Otherwise this is going to be a problem if (for example) software tries
to turn on boost for only 1 CPU. Think of this sequence:
1) 16 CPUs, cpb_boost is currently turned off.
2) Software tries to turn CPB boost on for the "first" CPU only.
3) It reads the value of the first CPU and sees it's 0. So It changes
the value for the first CPU (which causes a global change).
4) It reads the value for the second CPU and sees it's 1. It tries to
change this back to zero, which again causes a global change.
5) It checks all the others and they're all 0 and it does nothing.
So you see by having a global attribute which is shared with every
single CPU you now have a flow problem that userspace can introduce.
> +
> + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> + struct amd_cpudata *cpudata = policy->driver_data;
> +
> + if (!cpudata) {
> + pr_err("cpudata is NULL\n");
> + ret = -ENODATA;
> + cpufreq_cpu_put(policy);
> + goto err_exit;
> + }
> +
> + amd_cpu_boost_update(cpudata, amd_pstate_global_params.cpb_boost);
> + refresh_frequency_limits(policy);
> + cpufreq_cpu_put(policy);
> + }
> +
> +err_exit:
> + mutex_unlock(&amd_pstate_driver_lock);
> + return ret < 0 ? ret : count;
> +}
> +
> cpufreq_freq_attr_ro(amd_pstate_max_freq);
> cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
>
> @@ -1305,6 +1402,7 @@ cpufreq_freq_attr_rw(energy_performance_preference);
> cpufreq_freq_attr_ro(energy_performance_available_preferences);
> static DEVICE_ATTR_RW(status);
> static DEVICE_ATTR_RO(prefcore);
> +static DEVICE_ATTR_RW(cpb_boost);
>
> static struct freq_attr *amd_pstate_attr[] = {
> &amd_pstate_max_freq,
> @@ -1329,6 +1427,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
> static struct attribute *pstate_global_attributes[] = {
> &dev_attr_status.attr,
> &dev_attr_prefcore.attr,
> + &dev_attr_cpb_boost.attr,
> NULL
> };
>
Powered by blists - more mailing lists