lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <5b4b11d7-197e-41ec-82b6-8279125a95bd@amd.com>
Date: Tue, 22 Apr 2025 09:32:07 +0530
From: Dhananjay Ugwekar <Dhananjay.Ugwekar@....com>
To: Mario Limonciello <mario.limonciello@....com>, gautham.shenoy@....com
Cc: linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested
 CPU Min frequency" BIOS option



On 4/21/2025 10:23 PM, Mario Limonciello wrote:
> On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>> BIOS option (if it is set) value as part of the driver->init()
>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>> CPPC_REQ MSR.
>>
>> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
>> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
>> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
>> exit and suspend callbacks. amd_pstate_target() and
>> amd_pstate_epp_update_limit() which are invoked as part of the resume()
>> and online() callbacks will take care of restoring the CPPC_REQ back to
>> the latest sane values.
>>
>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@....com>
>> ---
>> Changes in v2:
>> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>>    to 0 by default
>> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>>    callbacks
>> ---
>>   drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>>   drivers/cpufreq/amd-pstate.h |  2 ++
>>   2 files changed, 56 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 02de51001eba..407fdd31fb0b 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>>   static int msr_init_perf(struct amd_cpudata *cpudata)
>>   {
>>       union perf_cached perf = READ_ONCE(cpudata->perf);
>> -    u64 cap1, numerator;
>> +    u64 cap1, numerator, cppc_req;
>> +    u8 min_perf;
>>         int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>>                        &cap1);
>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>>       if (ret)
>>           return ret;
>>   +    ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>> +    if (ret)
>> +        return ret;
>> +
>> +    WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>> +    min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>> +
>> +    /*
>> +     * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>> +     * indication that the min_perf value is the one specified through the BIOS option
>> +     */
>> +    cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>> +
>> +    if (!cppc_req)
>> +        perf.bios_min_perf = min_perf;
>> +
>>       perf.highest_perf = numerator;
>>       perf.max_limit_perf = numerator;
>>       perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>   {
>>       /*
>>        * Initialize lower frequency limit (i.e.policy->min) with
>> -     * lowest_nonlinear_frequency which is the most energy efficient
>> -     * frequency. Override the initial value set by cpufreq core and
>> -     * amd-pstate qos_requests.
>> +     * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>> +     * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>>        */
>>       if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>>           struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>>                             cpufreq_cpu_get(policy_data->cpu);
>>           struct amd_cpudata *cpudata;
>> +        union perf_cached perf;
>>             if (!policy)
>>               return -EINVAL;
>>             cpudata = policy->driver_data;
>> -        policy_data->min = cpudata->lowest_nonlinear_freq;
>> +        perf = READ_ONCE(cpudata->perf);
>> +
>> +        if (perf.bios_min_perf)
>> +            policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>> +                            perf.bios_min_perf);
>> +        else
>> +            policy_data->min = cpudata->lowest_nonlinear_freq;
>>       }
>>         cpufreq_verify_within_cpu_limits(policy_data);
>> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>>   static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>>   {
>>       struct amd_cpudata *cpudata = policy->driver_data;
>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +    /* Reset CPPC_REQ MSR to the BIOS value */
>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>         freq_qos_remove_request(&cpudata->req[1]);
>>       freq_qos_remove_request(&cpudata->req[0]);
>> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>       struct amd_cpudata *cpudata;
>>       union perf_cached perf;
>>       struct device *dev;
>> -    u64 value;
>>       int ret;
>>         /*
>> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>           cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>>       }
>>   -    if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>> -        ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>> -        if (ret)
>> -            return ret;
>> -        WRITE_ONCE(cpudata->cppc_req_cached, value);
>> -    }
>>       ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>>       if (ret)
>>           return ret;
>> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>       struct amd_cpudata *cpudata = policy->driver_data;
>>         if (cpudata) {
>> +        union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +        /* Reset CPPC_REQ MSR to the BIOS value */
>> +        amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>>           kfree(cpudata);
>>           policy->driver_data = NULL;
>>       }
>> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>>     static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>>   {
>> -    return 0;
>> +    struct amd_cpudata *cpudata = policy->driver_data;
>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +    /*
>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> +     * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>> +     * limits, epp and desired perf will get reset to the cached values in cpudata struct
>> +     */
>> +    return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>   }
>>     static int amd_pstate_suspend(struct cpufreq_policy *policy)
>>   {
>>       struct amd_cpudata *cpudata = policy->driver_data;
>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +    /*
>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> +     * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>> +     * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>> +     */
>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
> 
> In EPP mode this appears it would be OK because the perf value should get reset in the resume for amd_pstate_epp_update_limit() but in passive mode won't this never get reset on resume from suspend?

In passive mode, on resume, amd_pstate_target gets invoked through the code flow mentioned below,

Cpufreq_resume()->cpufreq_start_governor->(cpufreq_driver->start()&&cpufreq_driver->limits())->amd_pstate_target() [this is for _target() based governors]
For schedutil, start_governor will register the update_util hook and it will be called at every util change, which eventually calls adjust_perf()

I tested these scenarios using "sudo rtcwake -m mem -s 10" (suspend to mem for 10 seconds) on a server system, within 1-2 mins of the wakeup the CPPC_REQ MSR 
values of all CPUs get updated to sane ones. It would be helpful if you could test for such scenarios on the client systems as well.

That said, there might be a small window between the resume and governor trigger, where the value in CPPC_REQ MSR would be invalid. Are we okay with that ?

> 
>>         /* invalidate to ensure it's rewritten during resume */
>>       cpudata->cppc_req_cached = 0;
>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>> index fbe1c08d3f06..2f7ae364d331 100644
>> --- a/drivers/cpufreq/amd-pstate.h
>> +++ b/drivers/cpufreq/amd-pstate.h
>> @@ -30,6 +30,7 @@
>>    * @lowest_perf: the absolute lowest performance level of the processor
>>    * @min_limit_perf: Cached value of the performance corresponding to policy->min
>>    * @max_limit_perf: Cached value of the performance corresponding to policy->max
>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>>    */
>>   union perf_cached {
>>       struct {
>> @@ -39,6 +40,7 @@ union perf_cached {
>>           u8    lowest_perf;
>>           u8    min_limit_perf;
>>           u8    max_limit_perf;
>> +        u8    bios_min_perf;
>>       };
>>       u64    val;
>>   };
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ