lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0f784d8d-7141-4e35-9a61-e8875d0a8f11@amd.com>
Date: Wed, 23 Apr 2025 09:05:11 -0500
From: Mario Limonciello <mario.limonciello@....com>
To: Dhananjay Ugwekar <Dhananjay.Ugwekar@....com>, gautham.shenoy@....com
Cc: linux-pm@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested
 CPU Min frequency" BIOS option

On 4/21/2025 11:02 PM, Dhananjay Ugwekar wrote:
> 
> 
> On 4/21/2025 10:23 PM, Mario Limonciello wrote:
>> On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
>>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>>> BIOS option (if it is set) value as part of the driver->init()
>>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>>> CPPC_REQ MSR.
>>>
>>> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
>>> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
>>> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
>>> exit and suspend callbacks. amd_pstate_target() and
>>> amd_pstate_epp_update_limit() which are invoked as part of the resume()
>>> and online() callbacks will take care of restoring the CPPC_REQ back to
>>> the latest sane values.
>>>
>>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@....com>
>>> ---
>>> Changes in v2:
>>> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>>>     to 0 by default
>>> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>>>     callbacks
>>> ---
>>>    drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>>>    drivers/cpufreq/amd-pstate.h |  2 ++
>>>    2 files changed, 56 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>>> index 02de51001eba..407fdd31fb0b 100644
>>> --- a/drivers/cpufreq/amd-pstate.c
>>> +++ b/drivers/cpufreq/amd-pstate.c
>>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>>>    static int msr_init_perf(struct amd_cpudata *cpudata)
>>>    {
>>>        union perf_cached perf = READ_ONCE(cpudata->perf);
>>> -    u64 cap1, numerator;
>>> +    u64 cap1, numerator, cppc_req;
>>> +    u8 min_perf;
>>>          int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>>>                         &cap1);
>>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>>>        if (ret)
>>>            return ret;
>>>    +    ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>>> +    min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>>> +
>>> +    /*
>>> +     * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>>> +     * indication that the min_perf value is the one specified through the BIOS option
>>> +     */
>>> +    cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>>> +
>>> +    if (!cppc_req)
>>> +        perf.bios_min_perf = min_perf;
>>> +
>>>        perf.highest_perf = numerator;
>>>        perf.max_limit_perf = numerator;
>>>        perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>>    {
>>>        /*
>>>         * Initialize lower frequency limit (i.e.policy->min) with
>>> -     * lowest_nonlinear_frequency which is the most energy efficient
>>> -     * frequency. Override the initial value set by cpufreq core and
>>> -     * amd-pstate qos_requests.
>>> +     * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>>> +     * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>>>         */
>>>        if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>>>            struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>>>                              cpufreq_cpu_get(policy_data->cpu);
>>>            struct amd_cpudata *cpudata;
>>> +        union perf_cached perf;
>>>              if (!policy)
>>>                return -EINVAL;
>>>              cpudata = policy->driver_data;
>>> -        policy_data->min = cpudata->lowest_nonlinear_freq;
>>> +        perf = READ_ONCE(cpudata->perf);
>>> +
>>> +        if (perf.bios_min_perf)
>>> +            policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>>> +                            perf.bios_min_perf);
>>> +        else
>>> +            policy_data->min = cpudata->lowest_nonlinear_freq;
>>>        }
>>>          cpufreq_verify_within_cpu_limits(policy_data);
>>> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>>>    static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>>>    {
>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +    /* Reset CPPC_REQ MSR to the BIOS value */
>>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>          freq_qos_remove_request(&cpudata->req[1]);
>>>        freq_qos_remove_request(&cpudata->req[0]);
>>> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>>        struct amd_cpudata *cpudata;
>>>        union perf_cached perf;
>>>        struct device *dev;
>>> -    u64 value;
>>>        int ret;
>>>          /*
>>> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>>            cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>>>        }
>>>    -    if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>>> -        ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>>> -        if (ret)
>>> -            return ret;
>>> -        WRITE_ONCE(cpudata->cppc_req_cached, value);
>>> -    }
>>>        ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>>>        if (ret)
>>>            return ret;
>>> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>>          if (cpudata) {
>>> +        union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +        /* Reset CPPC_REQ MSR to the BIOS value */
>>> +        amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>> +
>>>            kfree(cpudata);
>>>            policy->driver_data = NULL;
>>>        }
>>> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>>>      static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>>>    {
>>> -    return 0;
>>> +    struct amd_cpudata *cpudata = policy->driver_data;
>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +    /*
>>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>>> +     * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>>> +     * limits, epp and desired perf will get reset to the cached values in cpudata struct
>>> +     */
>>> +    return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>    }
>>>      static int amd_pstate_suspend(struct cpufreq_policy *policy)
>>>    {
>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +    /*
>>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>>> +     * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>>> +     * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>>> +     */
>>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>
>> In EPP mode this appears it would be OK because the perf value should get reset in the resume for amd_pstate_epp_update_limit() but in passive mode won't this never get reset on resume from suspend?
> 
> In passive mode, on resume, amd_pstate_target gets invoked through the code flow mentioned below,
> 
> Cpufreq_resume()->cpufreq_start_governor->(cpufreq_driver->start()&&cpufreq_driver->limits())->amd_pstate_target() [this is for _target() based governors]
> For schedutil, start_governor will register the update_util hook and it will be called at every util change, which eventually calls adjust_perf()
> 
> I tested these scenarios using "sudo rtcwake -m mem -s 10" (suspend to mem for 10 seconds) on a server system, within 1-2 mins of the wakeup the CPPC_REQ MSR
> values of all CPUs get updated to sane ones. It would be helpful if you could test for such scenarios on the client systems as well.
> 
> That said, there might be a small window between the resume and governor trigger, where the value in CPPC_REQ MSR would be invalid. Are we okay with that ?

For server systems it's probably not much of a big deal since servers 
aren't frequently suspended, bu this window of time seems untenable for 
a client machine.  As a user that would mean effectively they have wrong 
limits programmed after wakeup for 1-2 minutes and could potentially 
have performance gimped as a result.

I'd say let's just flush the right value immediately after resume and 
then the write 1-2 minutes later becomes a no-op.  With the checks we 
have in the driver now I expect that they don't even turn into MSR writes.

> 
>>
>>>          /* invalidate to ensure it's rewritten during resume */
>>>        cpudata->cppc_req_cached = 0;
>>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>>> index fbe1c08d3f06..2f7ae364d331 100644
>>> --- a/drivers/cpufreq/amd-pstate.h
>>> +++ b/drivers/cpufreq/amd-pstate.h
>>> @@ -30,6 +30,7 @@
>>>     * @lowest_perf: the absolute lowest performance level of the processor
>>>     * @min_limit_perf: Cached value of the performance corresponding to policy->min
>>>     * @max_limit_perf: Cached value of the performance corresponding to policy->max
>>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>>>     */
>>>    union perf_cached {
>>>        struct {
>>> @@ -39,6 +40,7 @@ union perf_cached {
>>>            u8    lowest_perf;
>>>            u8    min_limit_perf;
>>>            u8    max_limit_perf;
>>> +        u8    bios_min_perf;
>>>        };
>>>        u64    val;
>>>    };
>>
> 


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ