linux-kernel - Re: [PATCH 10/10] sched/fair: Implement an EEVDF like policy

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <ZAoMgmMOYXBA4Mzs@hirez.programming.kicks-ass.net>
Date:   Thu, 9 Mar 2023 17:42:42 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Mike Galbraith <efault@....de>
Cc:     mingo@...nel.org, vincent.guittot@...aro.org,
        linux-kernel@...r.kernel.org, juri.lelli@...hat.com,
        dietmar.eggemann@....com, rostedt@...dmis.org, bsegall@...gle.com,
        mgorman@...e.de, bristot@...hat.com, corbet@....net,
        qyousef@...alina.io, chris.hyser@...cle.com,
        patrick.bellasi@...bug.net, pjt@...gle.com, pavel@....cz,
        qperret@...gle.com, tim.c.chen@...ux.intel.com, joshdon@...gle.com,
        timj@....org, kprateek.nayak@....com, yu.c.chen@...el.com,
        youssefesmat@...omium.org, joel@...lfernandes.org
Subject: Re: [PATCH 10/10] sched/fair: Implement an EEVDF like policy

On Thu, Mar 09, 2023 at 04:29:04PM +0100, Peter Zijlstra wrote:

> So if I add TICK_NSEC based sleeper bonus (/2 for gentle), then starve
> works -- this is the absolutely minimal amount required. It sucks a bit
> it's HZ dependent, but alas.

Fixes starve, sucks for schbench and hackbench :/

Clearly more thinking is required...

root@...-ep:~/bench# echo NO_FAIR_SLEEPERS > /debug/sched/features
root@...-ep:~/bench# ./doit-schbench.sh ; ./doit-hackbench-series.sh
Latency percentiles (usec)
50.0000th: 83
75.0000th: 102
90.0000th: 109
95.0000th: 114
*99.0000th: 450
99.5000th: 723
99.9000th: 985
min=0, max=1067
1:            0.55355 +- 0.00290 seconds time elapsed  ( +-  0.52% )
2:            0.79591 +- 0.00545 seconds time elapsed  ( +-  0.68% )
5:             1.5804 +- 0.0102 seconds time elapsed  ( +-  0.65% )
10:             2.5674 +- 0.0110 seconds time elapsed  ( +-  0.43% )
20:             4.6116 +- 0.0160 seconds time elapsed  ( +-  0.35% )
40:             9.5965 +- 0.0167 seconds time elapsed  ( +-  0.17% )
root@...-ep:~/bench# time taskset -c 3 ./starve/starve 1000000
expecting to receive 1000000 signals
^C

real    0m32.999s
user    0m0.000s
sys     0m0.719s
root@...-ep:~/bench# echo FAIR_SLEEPERS > /debug/sched/features
root@...-ep:~/bench# ./doit-schbench.sh ; ./doit-hackbench-series.sh
Latency percentiles (usec)
50.0000th: 87
75.0000th: 103
90.0000th: 111
95.0000th: 116
*99.0000th: 163
99.5000th: 697
99.9000th: 1110
min=0, max=1522
1:            0.59076 +- 0.00577 seconds time elapsed  ( +-  0.98% )
2:            0.86093 +- 0.00407 seconds time elapsed  ( +-  0.47% )
5:             2.1018 +- 0.0129 seconds time elapsed  ( +-  0.61% )
10:             3.6378 +- 0.0395 seconds time elapsed  ( +-  1.09% )
20:            5.56884 +- 0.00979 seconds time elapsed  ( +-  0.18% )
40:            10.8570 +- 0.0207 seconds time elapsed  ( +-  0.19% )
root@...-ep:~/bench# time taskset -c 3 ./starve/starve 1000000
expecting to receive 1000000 signals

real    0m5.651s
user    0m0.604s
sys     0m4.047s


---

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4938,17 +4938,22 @@ place_entity(struct cfs_rq *cfs_rq, stru
 {
 	u64 vruntime = avg_vruntime(cfs_rq);
 
+	if (sched_feat(PRESERVE_LAG))
+		vruntime -= se->lag;
+
 	if (sched_feat(FAIR_SLEEPERS)) {
-		u64 sleep_time;
+//		u64 sleep_time;
 
 		/* sleeps up to a single latency don't count. */
 		if (!initial) {
-			unsigned long thresh;
+			unsigned long thresh = TICK_NSEC;
 
-			if (se_is_idle(se))
-				thresh = sysctl_sched_min_granularity;
-			else
-				thresh = sysctl_sched_latency;
+			if (!sched_feat(EEVDF)) {
+				if (se_is_idle(se))
+					thresh = sysctl_sched_min_granularity;
+				else
+					thresh = sysctl_sched_latency;
+			}
 
 			/*
 			 * Halve their sleep time's effect, to allow
@@ -4957,7 +4962,7 @@ place_entity(struct cfs_rq *cfs_rq, stru
 			if (sched_feat(GENTLE_FAIR_SLEEPERS))
 				thresh >>= 1;
 
-			vruntime -= thresh;
+			vruntime -= calc_delta_fair(thresh, se);
 		}
 
 		/*
@@ -4966,15 +4971,12 @@ place_entity(struct cfs_rq *cfs_rq, stru
 		 * slept for a long time, don't even try to compare its vruntime with
 		 * the base as it may be too far off and the comparison may get
 		 * inversed due to s64 overflow.
-		 */
 		sleep_time = rq_clock_task(rq_of(cfs_rq)) - se->exec_start;
 		if ((s64)sleep_time < 60LL * NSEC_PER_SEC)
+		 */
 			vruntime = max_vruntime(se->vruntime, vruntime);
 	}
 
-	if (sched_feat(PRESERVE_LAG))
-		vruntime -= se->lag;
-
 	se->vruntime = vruntime;
 	set_slice(cfs_rq, se);
 }