lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231108095600.GM8262@noisy.programming.kicks-ass.net>
Date:   Wed, 8 Nov 2023 10:56:00 +0100
From:   Peter Zijlstra <peterz@...radead.org>
To:     Ankur Arora <ankur.a.arora@...cle.com>
Cc:     linux-kernel@...r.kernel.org, tglx@...utronix.de,
        torvalds@...ux-foundation.org, paulmck@...nel.org,
        linux-mm@...ck.org, x86@...nel.org, akpm@...ux-foundation.org,
        luto@...nel.org, bp@...en8.de, dave.hansen@...ux.intel.com,
        hpa@...or.com, mingo@...hat.com, juri.lelli@...hat.com,
        vincent.guittot@...aro.org, willy@...radead.org, mgorman@...e.de,
        jon.grimm@....com, bharata@....com, raghavendra.kt@....com,
        boris.ostrovsky@...cle.com, konrad.wilk@...cle.com,
        jgross@...e.com, andrew.cooper3@...rix.com, mingo@...nel.org,
        bristot@...nel.org, mathieu.desnoyers@...icios.com,
        geert@...ux-m68k.org, glaubitz@...sik.fu-berlin.de,
        anton.ivanov@...bridgegreys.com, mattst88@...il.com,
        krypton@...ich-teichert.org, rostedt@...dmis.org,
        David.Laight@...lab.com, richard@....at, mjguzik@...il.com
Subject: Re: [RFC PATCH 42/86] sched: force preemption on tick expiration

On Tue, Nov 07, 2023 at 01:57:28PM -0800, Ankur Arora wrote:

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 4d86c618ffa2..fe7e5e9b2207 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
>   * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
>   * this is probably good enough.
>   */
> -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
> +static void update_deadline(struct cfs_rq *cfs_rq,
> +			    struct sched_entity *se, bool tick)
>  {
> +	struct rq *rq = rq_of(cfs_rq);
> +
>  	if ((s64)(se->vruntime - se->deadline) < 0)
>  		return;
>  
> @@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>  	 */
>  	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
>  
> +	if (cfs_rq->nr_running < 2)
> +		return;
> +
>  	/*
> -	 * The task has consumed its request, reschedule.
> +	 * The task has consumed its request, reschedule; eagerly
> +	 * if it ignored our last lazy reschedule.
>  	 */
> -	if (cfs_rq->nr_running > 1) {
> -		resched_curr(rq_of(cfs_rq));
> -		clear_buddies(cfs_rq, se);
> -	}
> +	if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
> +		__resched_curr(rq, RESCHED_eager);
> +	else
> +		resched_curr(rq);
> +
> +	clear_buddies(cfs_rq, se);
>  }
>  
>  #include "pelt.h"
> @@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
>  /*
>   * Update the current task's runtime statistics.
>   */
> -static void update_curr(struct cfs_rq *cfs_rq)
> +static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
>  {
>  	struct sched_entity *curr = cfs_rq->curr;
>  	u64 now = rq_clock_task(rq_of(cfs_rq));
> @@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
>  	schedstat_add(cfs_rq->exec_clock, delta_exec);
>  
>  	curr->vruntime += calc_delta_fair(delta_exec, curr);
> -	update_deadline(cfs_rq, curr);
> +	update_deadline(cfs_rq, curr, tick);
>  	update_min_vruntime(cfs_rq);
>  
>  	if (entity_is_task(curr)) {
> @@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
>  	account_cfs_rq_runtime(cfs_rq, delta_exec);
>  }
>  
> +static void update_curr(struct cfs_rq *cfs_rq)
> +{
> +	__update_curr(cfs_rq, false);
> +}
> +
>  static void update_curr_fair(struct rq *rq)
>  {
>  	update_curr(cfs_rq_of(&rq->curr->se));
> @@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>  	/*
>  	 * Update run-time statistics of the 'current'.
>  	 */
> -	update_curr(cfs_rq);
> +	__update_curr(cfs_rq, true);
>  
>  	/*
>  	 * Ensure that runnable average is periodically updated.

I'm thinking this will be less of a mess if you flip it around some.

(ignore the hrtick mess, I'll try and get that cleaned up)

This way you have two distinct sites to handle the preemption. the
update_curr() would be 'FULL ? force : lazy' while the tick one gets the
special magic bits.

---
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index df348aa55d3c..5399696de9e0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1016,10 +1016,10 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
  * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
  * this is probably good enough.
  */
-static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
+static bool update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
 	if ((s64)(se->vruntime - se->deadline) < 0)
-		return;
+		return false;
 
 	/*
 	 * For EEVDF the virtual time slope is determined by w_i (iow.
@@ -1037,9 +1037,11 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * The task has consumed its request, reschedule.
 	 */
 	if (cfs_rq->nr_running > 1) {
-		resched_curr(rq_of(cfs_rq));
 		clear_buddies(cfs_rq, se);
+		return true;
 	}
+
+	return false;
 }
 
 #include "pelt.h"
@@ -1147,18 +1149,19 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
 /*
  * Update the current task's runtime statistics.
  */
-static void update_curr(struct cfs_rq *cfs_rq)
+static bool __update_curr(struct cfs_rq *cfs_rq)
 {
 	struct sched_entity *curr = cfs_rq->curr;
 	u64 now = rq_clock_task(rq_of(cfs_rq));
 	u64 delta_exec;
+	bool ret;
 
 	if (unlikely(!curr))
-		return;
+		return false;
 
 	delta_exec = now - curr->exec_start;
 	if (unlikely((s64)delta_exec <= 0))
-		return;
+		return false;
 
 	curr->exec_start = now;
 
@@ -1174,7 +1177,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	schedstat_add(cfs_rq->exec_clock, delta_exec);
 
 	curr->vruntime += calc_delta_fair(delta_exec, curr);
-	update_deadline(cfs_rq, curr);
+	ret = update_deadline(cfs_rq, curr);
 	update_min_vruntime(cfs_rq);
 
 	if (entity_is_task(curr)) {
@@ -1186,6 +1189,14 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	}
 
 	account_cfs_rq_runtime(cfs_rq, delta_exec);
+
+	return ret;
+}
+
+static void update_curr(struct cfs_rq *cfs_rq)
+{
+	if (__update_curr(cfs_rq))
+		resched_curr(rq_of(cfs_rq));
 }
 
 static void update_curr_fair(struct rq *rq)
@@ -5309,7 +5320,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	/*
 	 * Update run-time statistics of the 'current'.
 	 */
-	update_curr(cfs_rq);
+	bool resched = __update_curr(cfs_rq);
 
 	/*
 	 * Ensure that runnable average is periodically updated.
@@ -5317,22 +5328,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 	update_load_avg(cfs_rq, curr, UPDATE_TG);
 	update_cfs_group(curr);
 
-#ifdef CONFIG_SCHED_HRTICK
-	/*
-	 * queued ticks are scheduled to match the slice, so don't bother
-	 * validating it and just reschedule.
-	 */
-	if (queued) {
-		resched_curr(rq_of(cfs_rq));
-		return;
-	}
-	/*
-	 * don't let the period tick interfere with the hrtick preemption
-	 */
-	if (!sched_feat(DOUBLE_TICK) &&
-			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
-		return;
-#endif
+	return resched;
 }
 
 
@@ -12387,12 +12383,16 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &curr->se;
+	bool resched = false;
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		entity_tick(cfs_rq, se, queued);
+		resched |= entity_tick(cfs_rq, se, queued);
 	}
 
+	if (resched)
+		resched_curr(rq);
+
 	if (static_branch_unlikely(&sched_numa_balancing))
 		task_tick_numa(rq, curr);
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ