[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <Y2kLA8x40IiBEPYg@hirez.programming.kicks-ass.net>
Date: Mon, 7 Nov 2022 14:41:23 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Kajetan Puchalski <kajetan.puchalski@....com>
Cc: Jian-Min Liu <jian-min.liu@...iatek.com>,
Dietmar Eggemann <dietmar.eggemann@....com>,
Ingo Molnar <mingo@...nel.org>,
Vincent Guittot <vincent.guittot@...aro.org>,
Morten Rasmussen <morten.rasmussen@....com>,
Vincent Donnefort <vdonnefort@...gle.com>,
Quentin Perret <qperret@...gle.com>,
Patrick Bellasi <patrick.bellasi@...bug.net>,
Abhijeet Dharmapurikar <adharmap@...cinc.com>,
Qais Yousef <qais.yousef@....com>,
linux-kernel@...r.kernel.org,
Jonathan JMChen <jonathan.jmchen@...iatek.com>
Subject: Re: [RFC PATCH 0/1] sched/pelt: Change PELT halflife at runtime
On Thu, Sep 29, 2022 at 03:41:47PM +0100, Kajetan Puchalski wrote:
> Based on all the tests we've seen, jankbench or otherwise, the
> improvement can mainly be attributed to the faster ramp up of frequency
> caused by the shorter PELT window while using schedutil.
Would something terrible like the below help some?
If not, I suppose it could be modified to take the current state as
history. But basically it runs a faster pelt sum along side the regular
signal just for ramping up the frequency.
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index ee7f23c76bd3..9ba07a1d19f6 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -96,6 +96,7 @@ SCHED_FEAT(WA_BIAS, true)
*/
SCHED_FEAT(UTIL_EST, true)
SCHED_FEAT(UTIL_EST_FASTUP, true)
+SCHED_FEAT(UTIL_EST_FASTER, true)
SCHED_FEAT(LATENCY_WARN, false)
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 0f310768260c..13cd9e27ce3e 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -148,6 +148,22 @@ accumulate_sum(u64 delta, struct sched_avg *sa,
return periods;
}
+/*
+ * Compute a pelt util_avg assuming no history and @delta runtime.
+ */
+unsigned long faster_est_approx(u64 delta)
+{
+ unsigned long contrib = (unsigned long)delta; /* p == 0 -> delta < 1024 */
+ u64 periods = delta / 1024;
+
+ if (periods) {
+ delta %= 1024;
+ contrib = __accumulate_pelt_segments(periods, 1024, delta);
+ }
+
+ return (contrib << SCHED_CAPACITY_SHIFT) / PELT_MIN_DIVIDER;
+}
+
/*
* We can represent the historical contribution to runnable average as the
* coefficients of a geometric series. To do this we sub-divide our runnable
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index a4a20046e586..99827d5dda27 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2922,6 +2922,8 @@ static inline unsigned long cpu_util_dl(struct rq *rq)
return READ_ONCE(rq->avg_dl.util_avg);
}
+extern unsigned long faster_est_approx(u64 runtime);
+
/**
* cpu_util_cfs() - Estimates the amount of CPU capacity used by CFS tasks.
* @cpu: the CPU to get the utilization for.
@@ -2956,13 +2958,26 @@ static inline unsigned long cpu_util_dl(struct rq *rq)
*/
static inline unsigned long cpu_util_cfs(int cpu)
{
+ struct rq *rq = cpu_rq(cpu);
struct cfs_rq *cfs_rq;
unsigned long util;
- cfs_rq = &cpu_rq(cpu)->cfs;
+ cfs_rq = &rq->cfs;
util = READ_ONCE(cfs_rq->avg.util_avg);
if (sched_feat(UTIL_EST)) {
+ if (sched_feat(UTIL_EST_FASTER)) {
+ struct task_struct *curr;
+
+ rcu_read_lock();
+ curr = rcu_dereference(rq->curr);
+ if (likely(curr->sched_class == &fair_sched_class)) {
+ u64 runtime = curr->se.sum_exec_runtime - curr->se.exec_start;
+ util = max_t(unsigned long, util,
+ faster_est_approx(runtime * 2));
+ }
+ rcu_read_unlock();
+ }
util = max_t(unsigned long, util,
READ_ONCE(cfs_rq->avg.util_est.enqueued));
}
Powered by blists - more mailing lists