[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5b4b11d7-197e-41ec-82b6-8279125a95bd@amd.com>
Date: Tue, 22 Apr 2025 09:32:07 +0530
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@....com>
To: Mario Limonciello <mario.limonciello@....com>, gautham.shenoy@....com
Cc: linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested
CPU Min frequency" BIOS option
On 4/21/2025 10:23 PM, Mario Limonciello wrote:
> On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>> BIOS option (if it is set) value as part of the driver->init()
>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>> CPPC_REQ MSR.
>>
>> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
>> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
>> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
>> exit and suspend callbacks. amd_pstate_target() and
>> amd_pstate_epp_update_limit() which are invoked as part of the resume()
>> and online() callbacks will take care of restoring the CPPC_REQ back to
>> the latest sane values.
>>
>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@....com>
>> ---
>> Changes in v2:
>> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>> to 0 by default
>> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>> callbacks
>> ---
>> drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>> drivers/cpufreq/amd-pstate.h | 2 ++
>> 2 files changed, 56 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 02de51001eba..407fdd31fb0b 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>> static int msr_init_perf(struct amd_cpudata *cpudata)
>> {
>> union perf_cached perf = READ_ONCE(cpudata->perf);
>> - u64 cap1, numerator;
>> + u64 cap1, numerator, cppc_req;
>> + u8 min_perf;
>> int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>> &cap1);
>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>> if (ret)
>> return ret;
>> + ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>> + if (ret)
>> + return ret;
>> +
>> + WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>> + min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>> +
>> + /*
>> + * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>> + * indication that the min_perf value is the one specified through the BIOS option
>> + */
>> + cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>> +
>> + if (!cppc_req)
>> + perf.bios_min_perf = min_perf;
>> +
>> perf.highest_perf = numerator;
>> perf.max_limit_perf = numerator;
>> perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>> {
>> /*
>> * Initialize lower frequency limit (i.e.policy->min) with
>> - * lowest_nonlinear_frequency which is the most energy efficient
>> - * frequency. Override the initial value set by cpufreq core and
>> - * amd-pstate qos_requests.
>> + * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>> + * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>> */
>> if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>> struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>> cpufreq_cpu_get(policy_data->cpu);
>> struct amd_cpudata *cpudata;
>> + union perf_cached perf;
>> if (!policy)
>> return -EINVAL;
>> cpudata = policy->driver_data;
>> - policy_data->min = cpudata->lowest_nonlinear_freq;
>> + perf = READ_ONCE(cpudata->perf);
>> +
>> + if (perf.bios_min_perf)
>> + policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>> + perf.bios_min_perf);
>> + else
>> + policy_data->min = cpudata->lowest_nonlinear_freq;
>> }
>> cpufreq_verify_within_cpu_limits(policy_data);
>> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>> static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>> {
>> struct amd_cpudata *cpudata = policy->driver_data;
>> + union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> + /* Reset CPPC_REQ MSR to the BIOS value */
>> + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>> freq_qos_remove_request(&cpudata->req[1]);
>> freq_qos_remove_request(&cpudata->req[0]);
>> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>> struct amd_cpudata *cpudata;
>> union perf_cached perf;
>> struct device *dev;
>> - u64 value;
>> int ret;
>> /*
>> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>> cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>> }
>> - if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>> - ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>> - if (ret)
>> - return ret;
>> - WRITE_ONCE(cpudata->cppc_req_cached, value);
>> - }
>> ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>> if (ret)
>> return ret;
>> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>> struct amd_cpudata *cpudata = policy->driver_data;
>> if (cpudata) {
>> + union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> + /* Reset CPPC_REQ MSR to the BIOS value */
>> + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>> kfree(cpudata);
>> policy->driver_data = NULL;
>> }
>> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>> static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>> {
>> - return 0;
>> + struct amd_cpudata *cpudata = policy->driver_data;
>> + union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> + /*
>> + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> + * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>> + * limits, epp and desired perf will get reset to the cached values in cpudata struct
>> + */
>> + return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>> }
>> static int amd_pstate_suspend(struct cpufreq_policy *policy)
>> {
>> struct amd_cpudata *cpudata = policy->driver_data;
>> + union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> + /*
>> + * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> + * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>> + * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>> + */
>> + amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>
> In EPP mode this appears it would be OK because the perf value should get reset in the resume for amd_pstate_epp_update_limit() but in passive mode won't this never get reset on resume from suspend?
In passive mode, on resume, amd_pstate_target gets invoked through the code flow mentioned below,
Cpufreq_resume()->cpufreq_start_governor->(cpufreq_driver->start()&&cpufreq_driver->limits())->amd_pstate_target() [this is for _target() based governors]
For schedutil, start_governor will register the update_util hook and it will be called at every util change, which eventually calls adjust_perf()
I tested these scenarios using "sudo rtcwake -m mem -s 10" (suspend to mem for 10 seconds) on a server system, within 1-2 mins of the wakeup the CPPC_REQ MSR
values of all CPUs get updated to sane ones. It would be helpful if you could test for such scenarios on the client systems as well.
That said, there might be a small window between the resume and governor trigger, where the value in CPPC_REQ MSR would be invalid. Are we okay with that ?
>
>> /* invalidate to ensure it's rewritten during resume */
>> cpudata->cppc_req_cached = 0;
>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>> index fbe1c08d3f06..2f7ae364d331 100644
>> --- a/drivers/cpufreq/amd-pstate.h
>> +++ b/drivers/cpufreq/amd-pstate.h
>> @@ -30,6 +30,7 @@
>> * @lowest_perf: the absolute lowest performance level of the processor
>> * @min_limit_perf: Cached value of the performance corresponding to policy->min
>> * @max_limit_perf: Cached value of the performance corresponding to policy->max
>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>> */
>> union perf_cached {
>> struct {
>> @@ -39,6 +40,7 @@ union perf_cached {
>> u8 lowest_perf;
>> u8 min_limit_perf;
>> u8 max_limit_perf;
>> + u8 bios_min_perf;
>> };
>> u64 val;
>> };
>
Powered by blists - more mailing lists