[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZAoMgmMOYXBA4Mzs@hirez.programming.kicks-ass.net>
Date: Thu, 9 Mar 2023 17:42:42 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Mike Galbraith <efault@....de>
Cc: mingo@...nel.org, vincent.guittot@...aro.org,
linux-kernel@...r.kernel.org, juri.lelli@...hat.com,
dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
mgorman@...e.de, bristot@...hat.com, corbet@....net,
qyousef@...alina.io, chris.hyser@...cle.com,
patrick.bellasi@...bug.net, pjt@...gle.com, pavel@....cz,
qperret@...gle.com, tim.c.chen@...ux.intel.com, joshdon@...gle.com,
timj@....org, kprateek.nayak@....com, yu.c.chen@...el.com,
youssefesmat@...omium.org, joel@...lfernandes.org
Subject: Re: [PATCH 10/10] sched/fair: Implement an EEVDF like policy
On Thu, Mar 09, 2023 at 04:29:04PM +0100, Peter Zijlstra wrote:
> So if I add TICK_NSEC based sleeper bonus (/2 for gentle), then starve
> works -- this is the absolutely minimal amount required. It sucks a bit
> it's HZ dependent, but alas.
Fixes starve, sucks for schbench and hackbench :/
Clearly more thinking is required...
root@...-ep:~/bench# echo NO_FAIR_SLEEPERS > /debug/sched/features
root@...-ep:~/bench# ./doit-schbench.sh ; ./doit-hackbench-series.sh
Latency percentiles (usec)
50.0000th: 83
75.0000th: 102
90.0000th: 109
95.0000th: 114
*99.0000th: 450
99.5000th: 723
99.9000th: 985
min=0, max=1067
1: 0.55355 +- 0.00290 seconds time elapsed ( +- 0.52% )
2: 0.79591 +- 0.00545 seconds time elapsed ( +- 0.68% )
5: 1.5804 +- 0.0102 seconds time elapsed ( +- 0.65% )
10: 2.5674 +- 0.0110 seconds time elapsed ( +- 0.43% )
20: 4.6116 +- 0.0160 seconds time elapsed ( +- 0.35% )
40: 9.5965 +- 0.0167 seconds time elapsed ( +- 0.17% )
root@...-ep:~/bench# time taskset -c 3 ./starve/starve 1000000
expecting to receive 1000000 signals
^C
real 0m32.999s
user 0m0.000s
sys 0m0.719s
root@...-ep:~/bench# echo FAIR_SLEEPERS > /debug/sched/features
root@...-ep:~/bench# ./doit-schbench.sh ; ./doit-hackbench-series.sh
Latency percentiles (usec)
50.0000th: 87
75.0000th: 103
90.0000th: 111
95.0000th: 116
*99.0000th: 163
99.5000th: 697
99.9000th: 1110
min=0, max=1522
1: 0.59076 +- 0.00577 seconds time elapsed ( +- 0.98% )
2: 0.86093 +- 0.00407 seconds time elapsed ( +- 0.47% )
5: 2.1018 +- 0.0129 seconds time elapsed ( +- 0.61% )
10: 3.6378 +- 0.0395 seconds time elapsed ( +- 1.09% )
20: 5.56884 +- 0.00979 seconds time elapsed ( +- 0.18% )
40: 10.8570 +- 0.0207 seconds time elapsed ( +- 0.19% )
root@...-ep:~/bench# time taskset -c 3 ./starve/starve 1000000
expecting to receive 1000000 signals
real 0m5.651s
user 0m0.604s
sys 0m4.047s
---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4938,17 +4938,22 @@ place_entity(struct cfs_rq *cfs_rq, stru
{
u64 vruntime = avg_vruntime(cfs_rq);
+ if (sched_feat(PRESERVE_LAG))
+ vruntime -= se->lag;
+
if (sched_feat(FAIR_SLEEPERS)) {
- u64 sleep_time;
+// u64 sleep_time;
/* sleeps up to a single latency don't count. */
if (!initial) {
- unsigned long thresh;
+ unsigned long thresh = TICK_NSEC;
- if (se_is_idle(se))
- thresh = sysctl_sched_min_granularity;
- else
- thresh = sysctl_sched_latency;
+ if (!sched_feat(EEVDF)) {
+ if (se_is_idle(se))
+ thresh = sysctl_sched_min_granularity;
+ else
+ thresh = sysctl_sched_latency;
+ }
/*
* Halve their sleep time's effect, to allow
@@ -4957,7 +4962,7 @@ place_entity(struct cfs_rq *cfs_rq, stru
if (sched_feat(GENTLE_FAIR_SLEEPERS))
thresh >>= 1;
- vruntime -= thresh;
+ vruntime -= calc_delta_fair(thresh, se);
}
/*
@@ -4966,15 +4971,12 @@ place_entity(struct cfs_rq *cfs_rq, stru
* slept for a long time, don't even try to compare its vruntime with
* the base as it may be too far off and the comparison may get
* inversed due to s64 overflow.
- */
sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
if ((s64)sleep_time < 60LL * NSEC_PER_SEC)
+ */
vruntime = max_vruntime(se->vruntime, vruntime);
}
- if (sched_feat(PRESERVE_LAG))
- vruntime -= se->lag;
-
se->vruntime = vruntime;
set_slice(cfs_rq, se);
}
Powered by blists - more mailing lists