lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  PHC 
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 16 Dec 2020 12:50:23 +0800
From:   Xuewen Yan <>
Subject: [PATCH v2] fair/util_est: fix schedutil may use an old util_est.enqueued value at dequeue

From: Xuewen Yan <>

when a task dequeued, it would update it's util and cfs_rq's util,
the following calling relationship exists here:

update_load_avg() -> cfs_rq_util_change() -> cpufreq_update_util()->
sugov_update_[shared\|single] -> sugov_get_util() -> cpu_util_cfs();

util = max {rq->cfs.avg.util_avg,rq->cfs.avg.util_est.enqueued };

For those tasks alternate long and short runs scenarios, the
rq->cfs.avg.util_est.enqueued may be bigger than rq->cfs.avg.util_avg.
but because the _task_util_est(p) didn't be subtracted, this would
cause schedutil use an old util_est.enqueued value.

This could have an effect in task ramp-down and ramp-up scenarios.
when the task dequeued, we hope the freq could be reduced as soon
as possible. If the schedutil's value is the old util_est.enqueued, this
may cause the cpu couldn't reduce it's freq.

separate the util_est_dequeue() into util_est_dequeue() and
util_est_update(), and dequeue the _task_util_est(p) before update util.

Signed-off-by: Xuewen Yan <>
Signed-off-by: Dietmar Eggemann <>

Changes since v1:
-change the util_est_dequeue/update to inline type
-use unsigned int enqueued rather than util_est in util_est_dequeue
-remove "cpu" var

 kernel/sched/fair.c | 37 +++++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ae7ceba..864d6b9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3945,22 +3945,31 @@ static inline bool within_margin(int value, int margin)
 	return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
-static void
-util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)
+static inline void util_est_dequeue(struct cfs_rq *cfs_rq,
+				struct task_struct *p)
-	long last_ewma_diff;
-	struct util_est ue;
-	int cpu;
+	unsigned int enqueued;
 	if (!sched_feat(UTIL_EST))
 	/* Update root cfs_rq's estimated utilization */
-	ue.enqueued  = cfs_rq->avg.util_est.enqueued;
-	ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p));
-	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued);
+	enqueued  = cfs_rq->avg.util_est.enqueued;
+	enqueued -= min_t(unsigned int, enqueued, _task_util_est(p));
+	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
+static inline void util_est_update(struct cfs_rq *cfs_rq,
+				struct task_struct *p,
+				bool task_sleep)
+	long last_ewma_diff;
+	struct util_est ue;
+	if (!sched_feat(UTIL_EST))
+		return;
 	 * Skip update of task's estimated utilization when the task has not
@@ -4001,8 +4010,7 @@ static inline bool within_margin(int value, int margin)
 	 * To avoid overestimation of actual task utilization, skip updates if
 	 * we cannot grant there is idle time in this CPU.
-	cpu = cpu_of(rq_of(cfs_rq));
-	if (task_util(p) > capacity_orig_of(cpu))
+	if (task_util(p) > capacity_orig_of(cpu_of(rq_of(cfs_rq))))
@@ -4085,7 +4093,10 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
 util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
 static inline void
-util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p,
+util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
+static inline void
+util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p,
 		 bool task_sleep) {}
 static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
@@ -5589,6 +5600,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	int idle_h_nr_running = task_has_idle_policy(p);
 	bool was_sched_idle = sched_idle_rq(rq);
+	util_est_dequeue(&rq->cfs, p);
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
 		dequeue_entity(cfs_rq, se, flags);
@@ -5639,7 +5652,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		rq->next_balance = jiffies;
-	util_est_dequeue(&rq->cfs, p, task_sleep);
+	util_est_update(&rq->cfs, p, task_sleep);

Powered by blists - more mailing lists