[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230728145808.902892871@infradead.org>
Date: Fri, 28 Jul 2023 16:55:17 +0200
From: Peter Zijlstra <peterz@...radead.org>
To: anna-maria@...utronix.de, rafael@...nel.org, tglx@...utronix.de,
frederic@...nel.org, gautham.shenoy@....com
Cc: linux-kernel@...r.kernel.org, peterz@...radead.org,
daniel.lezcano@...aro.org, linux-pm@...r.kernel.org,
mingo@...hat.com, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
bristot@...hat.com, vschneid@...hat.com
Subject: [RFC][PATCH 2/3] cpuidle,teo: Improve NOHZ management
With cpuidle having added a TICK bucket, TEO will account all TICK and
longer idles there. This means we can now make an informed decision
about stopping the tick. If the sum of 'hit+intercepts' of all states
below the TICK bucket is more than 50%, it is most likely we'll not
reach the tick this time around either, so stopping the tick doesn't
make sense.
If we don't stop the tick, don't bother calling
tick_nohz_get_sleep_length() and assume duration is no longer than a
tick (could be improved to still look at the current pending time and
timers).
Since we have this extra state, remove the state_count based early
decisions.
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
---
drivers/cpuidle/governors/teo.c | 97 ++++++++++++++--------------------------
1 file changed, 34 insertions(+), 63 deletions(-)
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -139,6 +139,7 @@
#include <linux/sched/clock.h>
#include <linux/sched/topology.h>
#include <linux/tick.h>
+#include "../cpuidle.h"
/*
* The number of bits to shift the CPU's capacity by in order to determine
@@ -197,7 +198,6 @@ struct teo_cpu {
int next_recent_idx;
int recent_idx[NR_RECENT];
unsigned long util_threshold;
- bool utilized;
};
static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -276,11 +276,11 @@ static void teo_update(struct cpuidle_dr
cpu_data->total += bin->hits + bin->intercepts;
- if (target_residency_ns <= cpu_data->sleep_length_ns) {
+ if (target_residency_ns <= cpu_data->sleep_length_ns)
idx_timer = i;
- if (target_residency_ns <= measured_ns)
- idx_duration = i;
- }
+
+ if (target_residency_ns <= measured_ns)
+ idx_duration = i;
}
i = cpu_data->next_recent_idx++;
@@ -362,11 +362,12 @@ static int teo_select(struct cpuidle_dri
unsigned int recent_sum = 0;
unsigned int idx_hit_sum = 0;
unsigned int hit_sum = 0;
+ unsigned int tick_sum = 0;
int constraint_idx = 0;
int idx0 = 0, idx = -1;
bool alt_intercepts, alt_recent;
ktime_t delta_tick;
- s64 duration_ns;
+ s64 duration_ns = TICK_NSEC;
int i;
if (dev->last_state_idx >= 0) {
@@ -376,36 +377,26 @@ static int teo_select(struct cpuidle_dri
cpu_data->time_span_ns = local_clock();
- duration_ns = tick_nohz_get_sleep_length(&delta_tick);
- cpu_data->sleep_length_ns = duration_ns;
+ /* Should we stop the tick? */
+ for (i = 1; i < drv->state_count; i++) {
+ struct teo_bin *prev_bin = &cpu_data->state_bins[i-1];
+ struct cpuidle_state *s = &drv->states[i];
- /* Check if there is any choice in the first place. */
- if (drv->state_count < 2) {
- idx = 0;
- goto end;
- }
- if (!dev->states_usage[0].disable) {
- idx = 0;
- if (drv->states[1].target_residency_ns > duration_ns)
- goto end;
- }
+ tick_sum += prev_bin->intercepts;
+ tick_sum += prev_bin->hits;
- cpu_data->utilized = teo_cpu_is_utilized(dev->cpu, cpu_data);
- /*
- * If the CPU is being utilized over the threshold and there are only 2
- * states to choose from, the metrics need not be considered, so choose
- * the shallowest non-polling state and exit.
- */
- if (drv->state_count < 3 && cpu_data->utilized) {
- for (i = 0; i < drv->state_count; ++i) {
- if (!dev->states_usage[i].disable &&
- !(drv->states[i].flags & CPUIDLE_FLAG_POLLING)) {
- idx = i;
- goto end;
- }
- }
+ if (s->target_residency_ns >= SHORT_TICK_NSEC)
+ break;
}
+ if (2*tick_sum > cpu_data->total)
+ *stop_tick = false;
+
+ /* If we do stop the tick, ask for the next timer. */
+ if (*stop_tick)
+ duration_ns = tick_nohz_get_sleep_length(&delta_tick);
+ cpu_data->sleep_length_ns = duration_ns;
+
/*
* Find the deepest idle state whose target residency does not exceed
* the current sleep length and the deepest idle state not deeper than
@@ -446,13 +437,13 @@ static int teo_select(struct cpuidle_dri
idx_recent_sum = recent_sum;
}
- /* Avoid unnecessary overhead. */
- if (idx < 0) {
- idx = 0; /* No states enabled, must use 0. */
- goto end;
- } else if (idx == idx0) {
- goto end;
- }
+ /* No states enabled, must use 0 */
+ if (idx < 0)
+ return 0;
+
+ /* No point looking for something shallower than the first enabled state */
+ if (idx == idx0)
+ return idx;
/*
* If the sum of the intercepts metric for all of the idle states
@@ -541,29 +532,9 @@ static int teo_select(struct cpuidle_dri
* If the CPU is being utilized over the threshold, choose a shallower
* non-polling state to improve latency
*/
- if (cpu_data->utilized)
+ if (teo_cpu_is_utilized(dev->cpu, cpu_data))
idx = teo_find_shallower_state(drv, dev, idx, duration_ns, true);
-end:
- /*
- * Don't stop the tick if the selected state is a polling one or if the
- * expected idle duration is shorter than the tick period length.
- */
- if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
- duration_ns < TICK_NSEC) && !tick_nohz_tick_stopped()) {
- *stop_tick = false;
-
- /*
- * The tick is not going to be stopped, so if the target
- * residency of the state to be returned is not within the time
- * till the closest timer including the tick, try to correct
- * that.
- */
- if (idx > idx0 &&
- drv->states[idx].target_residency_ns > delta_tick)
- idx = teo_find_shallower_state(drv, dev, idx, delta_tick, false);
- }
-
return idx;
}
Powered by blists - more mailing lists