lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20230420150537.GC4253@hirez.programming.kicks-ass.net>
Date:   Thu, 20 Apr 2023 17:05:37 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     Chris Mason <clm@...a.com>
Cc:     David Vernet <void@...ifault.com>, linux-kernel@...r.kernel.org,
        kernel-team@...com, Ingo Molnar <mingo@...nel.org>,
        Vincent Guittot <vincent.guittot@...aro.org>,
        gautham.shenoy@....com
Subject: Re: schbench v1.0

On Mon, Apr 17, 2023 at 10:10:25AM +0200, Chris Mason wrote:

> F128 N10                EEVDF    Linus
> Wakeup  (usec): 99.0th: 755      1,266
> Request (usec): 99.0th: 25,632   22,304
> RPS    (count): 50.0th: 4,280    4,376
> 
> F128 N10 no-locking     EEVDF    Linus
> Wakeup  (usec): 99.0th: 823      1,118
> Request (usec): 99.0th: 17,184   14,192
> RPS    (count): 50.0th: 4,440    4,456

With the below fixlet (against queue/sched/eevdf) on my measly IVB-EP
(2*10*2):

./schbench -F128 -n10 -C

Request Latencies percentiles (usec) runtime 30 (s) (153800 total samples)
	  90.0th: 6376       (35699 samples)
	* 99.0th: 6440       (9055 samples)
	  99.9th: 7048       (1345 samples)

CFS

schbench -m2 -F128 -n10	-r90	OTHER	BATCH
Wakeup  (usec): 99.0th:		6600	6328
Request (usec): 99.0th:		35904	14640
RPS    (count): 50.0th:		5368	6104

EEVDF base_slice = 3000[us] (default)

schbench -m2 -F128 -n10	-r90	OTHER	BATCH
Wakeup  (usec): 99.0th:		3820	6968
Request (usec): 99.0th:		30496	24608
RPS    (count): 50.0th:		3836	5496

EEVDF base_slice = 6440[us] (per the calibrate run)

schbench -m2 -F128 -n10	-r90	OTHER	BATCH
Wakeup  (usec): 99.0th:		9136	6232
Request (usec): 99.0th:		21984	12944
RPS    (count): 50.0th:		4968	6184


With base_slice >= request and BATCH (disables wakeup preemption), the
EEVDF thing should turn into FIFO-queue, which is close to ideal for
your workload.

For giggles:

echo 6440000 > /debug/sched/base_slice_ns
echo NO_PLACE_LAG > /debug/sched/features
chrt -b 0 ./schbench -m2 -F128 -n10 -r90

gets me:

Wakeup Latencies percentiles (usec) runtime 90 (s) (526553 total samples)
	  50.0th: 2084       (158080 samples)
	  90.0th: 5320       (210675 samples)
	* 99.0th: 6232       (47643 samples)
	  99.9th: 6648       (4297 samples)
	  min=1, max=13105
Request Latencies percentiles (usec) runtime 90 (s) (526673 total samples)
	  50.0th: 7544       (157171 samples)
	  90.0th: 10992      (210461 samples)
	* 99.0th: 12944      (48069 samples)
	  99.9th: 15088      (3716 samples)
	  min=3841, max=32882
RPS percentiles (requests) runtime 90 (s) (9 total samples)
	  20.0th: 6184       (9 samples)
	* 50.0th: 6184       (0 samples)
	  90.0th: 6184       (0 samples)
	  min=6173, max=6180
average rps: 6195.77

FWIW, your RPS stats are broken, note how all the buckets are over the
max value and the average is too.

---
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 050e98c97ba3..931102b00786 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1071,6 +1071,8 @@ void set_latency_fair(struct sched_entity *se, int prio)
 	se->slice = div_u64(base << SCHED_FIXEDPOINT_SHIFT, weight);
 }
 
+static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
+
 /*
  * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
  * this is probably good enough.
@@ -1084,6 +1086,14 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * EEVDF: vd_i = ve_i + r_i / w_i
 	 */
 	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
+
+	/*
+	 * The task has consumed its request, reschedule.
+	 */
+	if (cfs_rq->nr_running > 1) {
+		resched_curr(rq_of(cfs_rq));
+		clear_buddies(cfs_rq, se);
+	}
 }
 
 #include "pelt.h"
@@ -3636,6 +3646,13 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
 		 * we need to scale se->vlag when w_i changes.
 		 */
 		se->vlag = div_s64(se->vlag * old_weight, weight);
+	} else {
+		/*
+		 * When the weight changes the virtual time slope changes and
+		 * we should adjust the virtual deadline. For now, punt and
+		 * simply reset.
+		 */
+		se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
 	}
 
 #ifdef CONFIG_SMP
@@ -5225,22 +5256,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		update_idle_cfs_rq_clock_pelt(cfs_rq);
 }
 
-/*
- * Preempt the current task with a newly woken task if needed:
- */
-static void
-check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
-{
-	if (pick_eevdf(cfs_rq) != curr) {
-		resched_curr(rq_of(cfs_rq));
-		/*
-		 * The current task ran long enough, ensure it doesn't get
-		 * re-elected due to buddy favours.
-		 */
-		clear_buddies(cfs_rq, curr);
-	}
-}
-
 static void
 set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
@@ -5353,9 +5384,6 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
 		return;
 #endif
-
-	if (cfs_rq->nr_running > 1)
-		check_preempt_tick(cfs_rq, curr);
 }
 
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ