[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <12665363.O9o76ZdvQC@rjwysocki.net>
Date: Thu, 24 Apr 2025 21:36:39 +0200
From: "Rafael J. Wysocki" <rjw@...ysocki.net>
To: Stephan Gerhold <stephan.gerhold@...aro.org>
Cc: "Rafael J. Wysocki" <rafael@...nel.org>,
Linux PM <linux-pm@...r.kernel.org>,
Christian Loehle <christian.loehle@....com>,
LKML <linux-kernel@...r.kernel.org>, Viresh Kumar <viresh.kumar@...aro.org>,
Srinivas Pandruvada <srinivas.pandruvada@...ux.intel.com>,
Mario Limonciello <mario.limonciello@....com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Sultan Alsawaf <sultan@...neltoast.com>,
Peter Zijlstra <peterz@...radead.org>,
Valentin Schneider <vschneid@...hat.com>, Ingo Molnar <mingo@...hat.com>,
regressions@...ts.linux.dev, Johan Hovold <johan@...nel.org>,
"Rafael J. Wysocki" <rafael.j.wysocki@...el.com>
Subject:
Re: [PATCH v3] cpufreq: Avoid using inconsistent policy->min and policy->max
On Thursday, April 24, 2025 6:37:46 PM CEST Rafael J. Wysocki wrote:
> Hi,
>
> On Thu, Apr 24, 2025 at 6:21 PM Stephan Gerhold
> <stephan.gerhold@...aro.org> wrote:
> >
> > Hi Rafael,
> >
> > On Wed, Apr 16, 2025 at 04:12:37PM +0200, Rafael J. Wysocki wrote:
> > > From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> > >
> > > Since cpufreq_driver_resolve_freq() can run in parallel with
> > > cpufreq_set_policy() and there is no synchronization between them,
> > > the former may access policy->min and policy->max while the latter
> > > is updating them and it may see intermediate values of them due
> > > to the way the update is carried out. Also the compiler is free
> > > to apply any optimizations it wants both to the stores in
> > > cpufreq_set_policy() and to the loads in cpufreq_driver_resolve_freq()
> > > which may result in additional inconsistencies.
> > >
> > > To address this, use WRITE_ONCE() when updating policy->min and
> > > policy->max in cpufreq_set_policy() and use READ_ONCE() for reading
> > > them in cpufreq_driver_resolve_freq(). Moreover, rearrange the update
> > > in cpufreq_set_policy() to avoid storing intermediate values in
> > > policy->min and policy->max with the help of the observation that
> > > their new values are expected to be properly ordered upfront.
> > >
> > > Also modify cpufreq_driver_resolve_freq() to take the possible reverse
> > > ordering of policy->min and policy->max, which may happen depending on
> > > the ordering of operations when this function and cpufreq_set_policy()
> > > run concurrently, into account by always honoring the max when it
> > > turns out to be less than the min (in case it comes from thermal
> > > throttling or similar).
> > >
> > > Fixes: 151717690694 ("cpufreq: Make policy min/max hard requirements")
> > > Cc: 5.16+ <stable@...r.kernel.org> # 5.16+
> > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
> > > ---
> > >
> > > This replaces the last 3 patches in
> > >
> > > https://lore.kernel.org/linux-pm/6171293.lOV4Wx5bFT@rjwysocki.net/
> > >
> > > v2 -> v3:
> > > * Fold 3 patches into one.
> > > * Drop an unrelated white space fixup change.
> > > * Fix a typo in a comment (Christian).
> > >
> > > v1 -> v2: Cosmetic changes
> > >
> > > ---
> > > drivers/cpufreq/cpufreq.c | 32 +++++++++++++++++++++++++-------
> > > 1 file changed, 25 insertions(+), 7 deletions(-)
> > >
> > > --- a/drivers/cpufreq/cpufreq.c
> > > +++ b/drivers/cpufreq/cpufreq.c
> > > @@ -495,8 +495,6 @@
> > > {
> > > unsigned int idx;
> > >
> > > - target_freq = clamp_val(target_freq, policy->min, policy->max);
> > > -
> > > if (!policy->freq_table)
> > > return target_freq;
> > >
> > > @@ -520,7 +518,22 @@
> > > unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy,
> > > unsigned int target_freq)
> > > {
> > > - return __resolve_freq(policy, target_freq, CPUFREQ_RELATION_LE);
> > > + unsigned int min = READ_ONCE(policy->min);
> > > + unsigned int max = READ_ONCE(policy->max);
> > > +
> > > + /*
> > > + * If this function runs in parallel with cpufreq_set_policy(), it may
> > > + * read policy->min before the update and policy->max after the update
> > > + * or the other way around, so there is no ordering guarantee.
> > > + *
> > > + * Resolve this by always honoring the max (in case it comes from
> > > + * thermal throttling or similar).
> > > + */
> > > + if (unlikely(min > max))
> > > + min = max;
> > > +
> > > + return __resolve_freq(policy, clamp_val(target_freq, min, max),
> > > + CPUFREQ_RELATION_LE);
> > > }
> > > EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
> > >
> > > @@ -2338,6 +2351,7 @@
> > > if (cpufreq_disabled())
> > > return -ENODEV;
> > >
> > > + target_freq = clamp_val(target_freq, policy->min, policy->max);
> > > target_freq = __resolve_freq(policy, target_freq, relation);
> > >
> > > pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
> > > @@ -2631,11 +2645,15 @@
> > > * Resolve policy min/max to available frequencies. It ensures
> > > * no frequency resolution will neither overshoot the requested maximum
> > > * nor undershoot the requested minimum.
> > > + *
> > > + * Avoid storing intermediate values in policy->max or policy->min and
> > > + * compiler optimizations around them because they may be accessed
> > > + * concurrently by cpufreq_driver_resolve_freq() during the update.
> > > */
> > > - policy->min = new_data.min;
> > > - policy->max = new_data.max;
> > > - policy->min = __resolve_freq(policy, policy->min, CPUFREQ_RELATION_L);
> > > - policy->max = __resolve_freq(policy, policy->max, CPUFREQ_RELATION_H);
> > > + WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H));
> > > + new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L);
> > > + WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min);
> >
> > I've tested the cpufreq throttling again in 6.15-rc3 to check your fix
> > for the schedutil CPUFREQ_NEED_UPDATE_LIMITS regression I reported [1].
> > The CPU frequency is now being throttled correctly when reaching high
> > temperatures. Thanks for fixing this!
> >
> > Unfortunately, the opposite case has now regressed with this patch:
> > After the CPU frequency has been throttled due to high temperature and
> > the device cools down again, the CPU frequency is stuck at minimum until
> > you reboot. policy->max will never restore to the maximum frequency.
> >
> > I've confirmed that this causes unexpected slowness after temperature
> > throttling on a Qualcomm X1E laptop, and Johan has confirmed that e.g.
> > the ThinkPad X13s is also affected. I would expect that most devices
> > using cpufreq cooling in the kernel are affected.
> >
> > Looking at the code, I think the problem is that __resolve_freq() ->
> > cpufreq_frequency_table_target() -> cpufreq_table_find_index*() and
> > cpufreq_is_in_limits() are still using the old policy->min/max value.
> > In this patch, you have only moved the clamp_val() usage directly in
> > __resolve_freq().
>
> You are right, that's the problem.
>
> The fix is basically straightforward, pass min and max to
> cpufreq_frequency_table_target() and propagate downward, but making
> that change may be somewhat error-prone.
>
> I'll try to cut a patch to test tomorrow.
Actually, I have it already, so please find appended.
Please give it a go as soon as you reasonably can.
Thanks!
---
From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
Subject: [PATCH v1] cpufreq: Fix setting policy limits when frequency tables are used
Commit 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and
policy->max") overlooked the fact that policy->min and policy->max were
accessed directly in cpufreq_frequency_table_target() and in the
functions called by it and the changes made by that commit led to
problems with setting policy limits.
Address this by passing the target frequency limits to __resolve_freq()
and cpufreq_frequency_table_target() and propagating them to the
functions called by the latter.
Fixes: 7491cdf46b5c ("cpufreq: Avoid using inconsistent policy->min and policy->max")
Reported-by: Stephan Gerhold <stephan.gerhold@...aro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
---
drivers/cpufreq/cpufreq.c | 22 ++++++----
drivers/cpufreq/cpufreq_ondemand.c | 3 -
drivers/cpufreq/freq_table.c | 6 +-
include/linux/cpufreq.h | 81 ++++++++++++++++++++++++-------------
4 files changed, 72 insertions(+), 40 deletions(-)
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -491,14 +491,18 @@
EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch);
static unsigned int __resolve_freq(struct cpufreq_policy *policy,
- unsigned int target_freq, unsigned int relation)
+ unsigned int target_freq,
+ unsigned int min, unsigned int max,
+ unsigned int relation)
{
unsigned int idx;
if (!policy->freq_table)
return target_freq;
- idx = cpufreq_frequency_table_target(policy, target_freq, relation);
+ target_freq = clamp_val(target_freq, min, max);
+
+ idx = cpufreq_frequency_table_target(policy, target_freq, min, max, relation);
policy->cached_resolved_idx = idx;
policy->cached_target_freq = target_freq;
return policy->freq_table[idx].frequency;
@@ -532,8 +536,7 @@
if (unlikely(min > max))
min = max;
- return __resolve_freq(policy, clamp_val(target_freq, min, max),
- CPUFREQ_RELATION_LE);
+ return __resolve_freq(policy, target_freq, min, max, CPUFREQ_RELATION_LE);
}
EXPORT_SYMBOL_GPL(cpufreq_driver_resolve_freq);
@@ -2351,8 +2354,8 @@
if (cpufreq_disabled())
return -ENODEV;
- target_freq = clamp_val(target_freq, policy->min, policy->max);
- target_freq = __resolve_freq(policy, target_freq, relation);
+ target_freq = __resolve_freq(policy, target_freq, policy->min,
+ policy->max, relation);
pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n",
policy->cpu, target_freq, relation, old_target_freq);
@@ -2650,8 +2653,11 @@
* compiler optimizations around them because they may be accessed
* concurrently by cpufreq_driver_resolve_freq() during the update.
*/
- WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max, CPUFREQ_RELATION_H));
- new_data.min = __resolve_freq(policy, new_data.min, CPUFREQ_RELATION_L);
+ WRITE_ONCE(policy->max, __resolve_freq(policy, new_data.max,
+ new_data.min, new_data.max,
+ CPUFREQ_RELATION_H));
+ new_data.min = __resolve_freq(policy, new_data.min, new_data.min,
+ new_data.max, CPUFREQ_RELATION_L);
WRITE_ONCE(policy->min, new_data.min > policy->max ? policy->max : new_data.min);
trace_cpu_frequency_limits(policy);
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -76,7 +76,8 @@
return freq_next;
}
- index = cpufreq_frequency_table_target(policy, freq_next, relation);
+ index = cpufreq_frequency_table_target(policy, freq_next, policy->min,
+ policy->max, relation);
freq_req = freq_table[index].frequency;
freq_reduc = freq_req * od_tuners->powersave_bias / 1000;
freq_avg = freq_req - freq_reduc;
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -115,8 +115,8 @@
EXPORT_SYMBOL_GPL(cpufreq_generic_frequency_table_verify);
int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation)
+ unsigned int target_freq, unsigned int min,
+ unsigned int max, unsigned int relation)
{
struct cpufreq_frequency_table optimal = {
.driver_data = ~0,
@@ -147,7 +147,7 @@
cpufreq_for_each_valid_entry_idx(pos, table, i) {
freq = pos->frequency;
- if ((freq < policy->min) || (freq > policy->max))
+ if (freq < min || freq > max)
continue;
if (freq == target_freq) {
optimal.driver_data = i;
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -788,8 +788,8 @@
int cpufreq_generic_frequency_table_verify(struct cpufreq_policy_data *policy);
int cpufreq_table_index_unsorted(struct cpufreq_policy *policy,
- unsigned int target_freq,
- unsigned int relation);
+ unsigned int target_freq, unsigned int min,
+ unsigned int max, unsigned int relation);
int cpufreq_frequency_table_get_index(struct cpufreq_policy *policy,
unsigned int freq);
@@ -852,12 +852,12 @@
return best;
}
-/* Works only on sorted freq-tables */
-static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy,
- unsigned int target_freq,
- bool efficiencies)
+static inline int find_index_l(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int min, unsigned int max,
+ bool efficiencies)
{
- target_freq = clamp_val(target_freq, policy->min, policy->max);
+ target_freq = clamp_val(target_freq, min, max);
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
return cpufreq_table_find_index_al(policy, target_freq,
@@ -867,6 +867,14 @@
efficiencies);
}
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_l(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ bool efficiencies)
+{
+ return find_index_l(policy, target_freq, policy->min, policy->max, efficiencies);
+}
+
/* Find highest freq at or below target in a table in ascending order */
static inline int cpufreq_table_find_index_ah(struct cpufreq_policy *policy,
unsigned int target_freq,
@@ -920,12 +928,12 @@
return best;
}
-/* Works only on sorted freq-tables */
-static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy,
- unsigned int target_freq,
- bool efficiencies)
+static inline int find_index_h(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int min, unsigned int max,
+ bool efficiencies)
{
- target_freq = clamp_val(target_freq, policy->min, policy->max);
+ target_freq = clamp_val(target_freq, min, max);
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
return cpufreq_table_find_index_ah(policy, target_freq,
@@ -935,6 +943,14 @@
efficiencies);
}
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_h(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ bool efficiencies)
+{
+ return find_index_h(policy, target_freq, policy->min, policy->max, efficiencies);
+}
+
/* Find closest freq to target in a table in ascending order */
static inline int cpufreq_table_find_index_ac(struct cpufreq_policy *policy,
unsigned int target_freq,
@@ -1005,12 +1021,12 @@
return best;
}
-/* Works only on sorted freq-tables */
-static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
- unsigned int target_freq,
- bool efficiencies)
+static inline int find_index_c(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ unsigned int min, unsigned int max,
+ bool efficiencies)
{
- target_freq = clamp_val(target_freq, policy->min, policy->max);
+ target_freq = clamp_val(target_freq, min, max);
if (policy->freq_table_sorted == CPUFREQ_TABLE_SORTED_ASCENDING)
return cpufreq_table_find_index_ac(policy, target_freq,
@@ -1020,7 +1036,17 @@
efficiencies);
}
-static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy, int idx)
+/* Works only on sorted freq-tables */
+static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
+ unsigned int target_freq,
+ bool efficiencies)
+{
+ return find_index_c(policy, target_freq, policy->min, policy->max, efficiencies);
+}
+
+static inline bool cpufreq_is_in_limits(struct cpufreq_policy *policy,
+ unsigned int min, unsigned int max,
+ int idx)
{
unsigned int freq;
@@ -1029,11 +1055,13 @@
freq = policy->freq_table[idx].frequency;
- return freq == clamp_val(freq, policy->min, policy->max);
+ return freq == clamp_val(freq, min, max);
}
static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
unsigned int target_freq,
+ unsigned int min,
+ unsigned int max,
unsigned int relation)
{
bool efficiencies = policy->efficiencies_available &&
@@ -1044,21 +1072,18 @@
relation &= ~CPUFREQ_RELATION_E;
if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
- return cpufreq_table_index_unsorted(policy, target_freq,
- relation);
+ return cpufreq_table_index_unsorted(policy, target_freq, min,
+ max, relation);
retry:
switch (relation) {
case CPUFREQ_RELATION_L:
- idx = cpufreq_table_find_index_l(policy, target_freq,
- efficiencies);
+ idx = find_index_l(policy, target_freq, min, max, efficiencies);
break;
case CPUFREQ_RELATION_H:
- idx = cpufreq_table_find_index_h(policy, target_freq,
- efficiencies);
+ idx = find_index_h(policy, target_freq, min, max, efficiencies);
break;
case CPUFREQ_RELATION_C:
- idx = cpufreq_table_find_index_c(policy, target_freq,
- efficiencies);
+ idx = find_index_c(policy, target_freq, min, max, efficiencies);
break;
default:
WARN_ON_ONCE(1);
@@ -1066,7 +1091,7 @@
}
/* Limit frequency index to honor policy->min/max */
- if (!cpufreq_is_in_limits(policy, idx) && efficiencies) {
+ if (!cpufreq_is_in_limits(policy, min, max, idx) && efficiencies) {
efficiencies = false;
goto retry;
}
Powered by blists - more mailing lists