linux-kernel - [RFC][PATCH 13/15] sched/fair: Implement latency-nice

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <20230531124604.477939524@infradead.org>
Date:   Wed, 31 May 2023 13:58:52 +0200
From:   Peter Zijlstra <peterz@...radead.org>
To:     mingo@...nel.org, vincent.guittot@...aro.org
Cc:     linux-kernel@...r.kernel.org, peterz@...radead.org,
        juri.lelli@...hat.com, dietmar.eggemann@....com,
        rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
        bristot@...hat.com, corbet@....net, qyousef@...alina.io,
        chris.hyser@...cle.com, patrick.bellasi@...bug.net, pjt@...gle.com,
        pavel@....cz, qperret@...gle.com, tim.c.chen@...ux.intel.com,
        joshdon@...gle.com, timj@....org, kprateek.nayak@....com,
        yu.c.chen@...el.com, youssefesmat@...omium.org,
        joel@...lfernandes.org, efault@....de, tglx@...utronix.de
Subject: [RFC][PATCH 13/15] sched/fair: Implement latency-nice

Implement latency-nice as a modulation of the EEVDF r_i parameter,
specifically apply the inverse sched_prio_to_weight[] relation on
base_slice.

Given a base slice of 3 [ms], this gives a range of:

  latency-nice  19: 3*1024 / 15    ~= 204.8 [ms]
  latency-nice -20: 3*1024 / 88761 ~= 0.034 [ms]

(which might not make sense)

Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Tested-by: K Prateek Nayak <kprateek.nayak@....com>
---
 kernel/sched/core.c  |   14 ++++++++++----
 kernel/sched/fair.c  |   22 +++++++++++++++-------
 kernel/sched/sched.h |    2 ++
 3 files changed, 27 insertions(+), 11 deletions(-)

--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1305,6 +1305,12 @@ static void set_load_weight(struct task_
 	}
 }
 
+static inline void set_latency_prio(struct task_struct *p, int prio)
+{
+	p->latency_prio = prio;
+	set_latency_fair(&p->se, prio - MAX_RT_PRIO);
+}
+
 #ifdef CONFIG_UCLAMP_TASK
 /*
  * Serializes updates of utilization clamp values
@@ -4464,9 +4470,10 @@ static void __sched_fork(unsigned long c
 	p->se.nr_migrations		= 0;
 	p->se.vruntime			= 0;
 	p->se.vlag			= 0;
-	p->se.slice			= sysctl_sched_base_slice;
 	INIT_LIST_HEAD(&p->se.group_node);
 
+	set_latency_prio(p, p->latency_prio);
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	p->se.cfs_rq			= NULL;
 #endif
@@ -4718,8 +4725,7 @@ int sched_fork(unsigned long clone_flags
 
 		p->prio = p->normal_prio = p->static_prio;
 		set_load_weight(p, false);
-
-		p->latency_prio = NICE_TO_PRIO(0);
+		set_latency_prio(p, NICE_TO_PRIO(0));
 
 		/*
 		 * We don't need the reset flag anymore after the fork. It has
@@ -7507,7 +7513,7 @@ static void __setscheduler_latency(struc
 				   const struct sched_attr *attr)
 {
 	if (attr->sched_flags & SCHED_FLAG_LATENCY_NICE)
-		p->latency_prio = NICE_TO_PRIO(attr->sched_latency_nice);
+		set_latency_prio(p, NICE_TO_PRIO(attr->sched_latency_nice));
 }
 
 /*
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -952,6 +952,21 @@ int sched_update_scaling(void)
 }
 #endif
 
+void set_latency_fair(struct sched_entity *se, int prio)
+{
+	u32 weight = sched_prio_to_weight[prio];
+	u64 base = sysctl_sched_base_slice;
+
+	/*
+	 * For EEVDF the virtual time slope is determined by w_i (iow.
+	 * nice) while the request time r_i is determined by
+	 * latency-nice.
+	 *
+	 * Smaller request gets better latency.
+	 */
+	se->slice = div_u64(base << SCHED_FIXEDPOINT_SHIFT, weight);
+}
+
 static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
 
 /*
@@ -964,13 +979,6 @@ static void update_deadline(struct cfs_r
 		return;
 
 	/*
-	 * For EEVDF the virtual time slope is determined by w_i (iow.
-	 * nice) while the request time r_i is determined by
-	 * sysctl_sched_base_slice.
-	 */
-	se->slice = sysctl_sched_base_slice;
-
-	/*
 	 * EEVDF: vd_i = ve_i + r_i / w_i
 	 */
 	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2495,6 +2495,8 @@ extern unsigned int sysctl_numa_balancin
 extern unsigned int sysctl_numa_balancing_hot_threshold;
 #endif
 
+extern void set_latency_fair(struct sched_entity *se, int prio);
+
 #ifdef CONFIG_SCHED_HRTICK
 
 /*