lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20230820210640.585311-3-qyousef@layalina.io>
Date:   Sun, 20 Aug 2023 22:06:38 +0100
From:   Qais Yousef <qyousef@...alina.io>
To:     "Rafael J. Wysocki" <rafael@...nel.org>,
        Viresh Kumar <viresh.kumar@...aro.org>,
        Ingo Molnar <mingo@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>
Cc:     linux-kernel@...r.kernel.org, linux-pm@...r.kernel.org,
        Vincent Guittot <vincent.guittot@...aro.org>,
        Dietmar Eggemann <dietmar.eggemann@....com>,
        Lukasz Luba <lukasz.luba@....com>,
        Qais Yousef <qyousef@...alina.io>
Subject: [PATCH 2/4] sched: cpufreq: Fix apply_dvfs_headroom() escaping uclamp constraints

DVFS headroom is applied after we calculate the effective_cpu_util()
which is where we honour uclamp constraints. It makes more sense to
apply the headroom there once and let all users naturally get the right
thing without having to sprinkle the call around in various places.

Before this fix running

	uclampset -M 800 cat /dev/zero > /dev/null

Will cause the test system to run at max freq of 2.8GHz. After the fix
it runs at 2.2GHz instead which is the correct value that matches the
capacity of 800.

Note that similar problem exist for uclamp_min. If util was 50, and
uclamp_min is 100. Since we apply_dvfs_headroom() after apply uclamp
constraints, we'll end up with util of 125 instead of 100. IOW, we get
boosted twice, first time by uclamp_min, and second time by dvfs
headroom.

Signed-off-by: Qais Yousef (Google) <qyousef@...alina.io>
---
 include/linux/energy_model.h     |  1 -
 kernel/sched/core.c              | 11 ++++++++---
 kernel/sched/cpufreq_schedutil.c |  5 ++---
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h
index 6ebde4e69e98..adec808b371a 100644
--- a/include/linux/energy_model.h
+++ b/include/linux/energy_model.h
@@ -243,7 +243,6 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd,
 	scale_cpu = arch_scale_cpu_capacity(cpu);
 	ps = &pd->table[pd->nr_perf_states - 1];
 
-	max_util = apply_dvfs_headroom(max_util);
 	max_util = min(max_util, allowed_cpu_cap);
 	freq = map_util_freq(max_util, ps->frequency, scale_cpu);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index efe3848978a0..441d433c83cd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7439,8 +7439,10 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
 	 * frequency will be gracefully reduced with the utilization decay.
 	 */
 	util = util_cfs + cpu_util_rt(rq);
-	if (type == FREQUENCY_UTIL)
+	if (type == FREQUENCY_UTIL) {
+		util = apply_dvfs_headroom(util);
 		util = uclamp_rq_util_with(rq, util, p);
+	}
 
 	dl_util = cpu_util_dl(rq);
 
@@ -7471,9 +7473,12 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
 	 *              max - irq
 	 *   U' = irq + --------- * U
 	 *                 max
+	 *
+	 * We only need to apply dvfs headroom to irq part since the util part
+	 * already had it applied.
 	 */
 	util = scale_irq_capacity(util, irq, max);
-	util += irq;
+	util += type ==  FREQUENCY_UTIL ? apply_dvfs_headroom(irq) : irq;
 
 	/*
 	 * Bandwidth required by DEADLINE must always be granted while, for
@@ -7486,7 +7491,7 @@ unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
 	 * an interface. So, we only do the latter for now.
 	 */
 	if (type == FREQUENCY_UTIL)
-		util += cpu_bw_dl(rq);
+		util += apply_dvfs_headroom(cpu_bw_dl(rq));
 
 	return min(max, util);
 }
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 916c4d3d6192..0c7565ac31fb 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -143,7 +143,6 @@ static unsigned int get_next_freq(struct sugov_policy *sg_policy,
 	unsigned int freq = arch_scale_freq_invariant() ?
 				policy->cpuinfo.max_freq : policy->cur;
 
-	util = apply_dvfs_headroom(util);
 	freq = map_util_freq(util, freq, max);
 
 	if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update)
@@ -406,8 +405,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 	    sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util)
 		sg_cpu->util = prev_util;
 
-	cpufreq_driver_adjust_perf(sg_cpu->cpu, apply_dvfs_headroom(sg_cpu->bw_dl),
-				   apply_dvfs_headroom(sg_cpu->util), max_cap);
+	cpufreq_driver_adjust_perf(sg_cpu->cpu, sg_cpu->bw_dl,
+				   sg_cpu->util, max_cap);
 
 	sg_cpu->sg_policy->last_freq_update_time = time;
 }
-- 
2.34.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ