lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1495616452-7582-2-git-send-email-vincent.guittot@linaro.org>
Date:   Wed, 24 May 2017 11:00:51 +0200
From:   Vincent Guittot <vincent.guittot@...aro.org>
To:     peterz@...radead.org, mingo@...nel.org,
        linux-kernel@...r.kernel.org
Cc:     rjw@...ysocki.net, juri.lelli@....com, dietmar.eggemann@....com,
        Morten.Rasmussen@....com,
        Vincent Guittot <vincent.guittot@...aro.org>
Subject: [RFC PATCH 1/2] sched/rt: add utilization tracking

schedutil governor relies on cfs_rq's util_avg to choose the OPP when cfs
tasks are running. When the CPU is overloaded by cfs and rt tasks, cfs tasks
are preempted by rt tasks and in this case util_avg reflects the remaining
capacity that is used by cfs tasks but not what cfs tasks want to use. In such
case, schedutil can select a lower OPP when cfs task runs whereas the CPU is
overloaded. In order to have a more accurate view of the utilization of the
CPU, we track the utilization that is used by RT tasks.
DL tasks are not taken into account as they have their own utilization
tracking mecanism.

We don't use rt_avg which doesn't have the same dynamic as PELT and which
can include IRQ time that are also accounted in cfs task utilization

Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
---

If the changes are reasonnable, it might worth moving the PELT function in a
dedicated pelt.c file and the ugly
extern int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running);
in a pelt.h header


 kernel/sched/fair.c  | 21 +++++++++++++++++++++
 kernel/sched/rt.c    |  9 +++++++++
 kernel/sched/sched.h |  3 +++
 3 files changed, 33 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 47a0c55..0d7698b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2950,6 +2950,17 @@ __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
 			cfs_rq->curr != NULL, cfs_rq);
 }
 
+int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running)
+{
+	int ret;
+
+	ret = ___update_load_avg(now, cpu, &rt_rq->avg, 0, running, NULL);
+
+
+	return ret;
+}
+
+
 /*
  * Signed add and clamp on underflow.
  *
@@ -3478,6 +3489,11 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
 	return 0;
 }
 
+int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running)
+{
+	return 0;
+}
+
 #define UPDATE_TG	0x0
 #define SKIP_AGE_LOAD	0x0
 
@@ -7008,6 +7024,10 @@ static void update_blocked_averages(int cpu)
 		if (cfs_rq_is_decayed(cfs_rq))
 			list_del_leaf_cfs_rq(cfs_rq);
 	}
+
+	update_rt_rq_load_avg(rq_clock_task(rq), cpu, &rq->rt, 0);
+
+
 	rq_unlock_irqrestore(rq, &rf);
 }
 
@@ -7067,6 +7087,7 @@ static inline void update_blocked_averages(int cpu)
 	rq_lock_irqsave(rq, &rf);
 	update_rq_clock(rq);
 	update_cfs_rq_load_avg(cfs_rq_clock_task(cfs_rq), cfs_rq, true);
+	update_rt_rq_load_avg(rq_clock_task(rq), cpu, &rq->rt, 0);
 	rq_unlock_irqrestore(rq, &rf);
 }
 
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 581d5c7..09293fa 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1534,6 +1534,8 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
 	return p;
 }
 
+extern int update_rt_rq_load_avg(u64 now, int cpu, struct rt_rq *rt_rq, int running);
+
 static struct task_struct *
 pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 {
@@ -1579,6 +1581,10 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 
 	queue_push_tasks(rq);
 
+	if (p)
+		update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), rt_rq,
+				rq->curr->sched_class == &rt_sched_class);
+
 	return p;
 }
 
@@ -1586,6 +1592,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
 	update_curr_rt(rq);
 
+	update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), &rq->rt, 1);
+
 	/*
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
@@ -2368,6 +2376,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
 	struct sched_rt_entity *rt_se = &p->rt;
 
 	update_curr_rt(rq);
+	update_rt_rq_load_avg(rq_clock_task(rq), cpu_of(rq), &rq->rt, 1);
 
 	watchdog(rq, p);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index f8cf1d8..75b17ff 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -502,6 +502,9 @@ struct rt_rq {
 	unsigned long rt_nr_total;
 	int overloaded;
 	struct plist_head pushable_tasks;
+
+	struct sched_avg avg;
+
 #ifdef HAVE_RT_PUSH_IPI
 	int push_flags;
 	int push_cpu;
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ