linux-kernel - [RFC v3 1/6] Track the active utilisation

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1477317998-7487-2-git-send-email-luca.abeni@unitn.it>
Date:   Mon, 24 Oct 2016 16:06:33 +0200
From:   Luca Abeni <luca.abeni@...tn.it>
To:     linux-kernel@...r.kernel.org
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Ingo Molnar <mingo@...hat.com>,
        Juri Lelli <juri.lelli@....com>,
        Claudio Scordino <claudio@...dence.eu.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Luca Abeni <luca.abeni@...tn.it>
Subject: [RFC v3 1/6] Track the active utilisation

The active utilisation here is defined as the total utilisation of the
active (TASK_RUNNING) tasks queued on a runqueue. Hence, it is increased
when a task wakes up and is decreased when a task blocks.

When a task is migrated from CPUi to CPUj, immediately subtract the task's
utilisation from CPUi and add it to CPUj. This mechanism is implemented by
modifying the pull and push functions.
Note: this is not fully correct from the theoretical point of view
(the utilisation should be removed from CPUi only at the 0 lag time),
but doing the right thing would be _MUCH_ more complex (leaving the
timer armed when the task is on a different CPU... Inactive timers should
be moved from per-task timers to per-runqueue lists of timers! Bah...)

The utilisation tracking mechanism implemented in this commit can be
fixed / improved by decreasing the active utilisation at the so-called
"0-lag time" instead of when the task blocks.

Signed-off-by: Juri Lelli <juri.lelli@....com>
Signed-off-by: Luca Abeni <luca.abeni@...tn.it>
---
 kernel/sched/deadline.c | 39 ++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h    |  6 ++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 37e2449..3d95c1d 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -43,6 +43,22 @@ static inline int on_dl_rq(struct sched_dl_entity *dl_se)
 	return !RB_EMPTY_NODE(&dl_se->rb_node);
 }
 
+static void add_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+	u64 se_bw = dl_se->dl_bw;
+
+	dl_rq->running_bw += se_bw;
+}
+
+static void sub_running_bw(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)
+{
+	u64 se_bw = dl_se->dl_bw;
+
+	dl_rq->running_bw -= se_bw;
+	if (WARN_ON(dl_rq->running_bw < 0))
+		dl_rq->running_bw = 0;
+}
+
 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq)
 {
 	struct sched_dl_entity *dl_se = &p->dl;
@@ -498,6 +514,8 @@ static void update_dl_entity(struct sched_dl_entity *dl_se,
 	struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 	struct rq *rq = rq_of_dl_rq(dl_rq);
 
+	add_running_bw(dl_se, dl_rq);
+
 	if (dl_time_before(dl_se->deadline, rq_clock(rq)) ||
 	    dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) {
 		dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline;
@@ -947,14 +965,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 		return;
 	}
 
+	if (p->on_rq == TASK_ON_RQ_MIGRATING)
+		add_running_bw(&p->dl, &rq->dl);
+
 	/*
 	 * If p is throttled, we do nothing. In fact, if it exhausted
 	 * its budget it needs a replenishment and, since it now is on
 	 * its rq, the bandwidth timer callback (which clearly has not
 	 * run yet) will take care of this.
 	 */
-	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH))
+	if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) {
+		add_running_bw(&p->dl, &rq->dl);
 		return;
+	}
 
 	enqueue_dl_entity(&p->dl, pi_se, flags);
 
@@ -972,6 +995,12 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_curr_dl(rq);
 	__dequeue_task_dl(rq, p, flags);
+
+	if (p->on_rq == TASK_ON_RQ_MIGRATING)
+		sub_running_bw(&p->dl, &rq->dl);
+
+	if (flags & DEQUEUE_SLEEP)
+		sub_running_bw(&p->dl, &rq->dl);
 }
 
 /*
@@ -1501,7 +1530,9 @@ static int push_dl_task(struct rq *rq)
 	}
 
 	deactivate_task(rq, next_task, 0);
+	sub_running_bw(&next_task->dl, &rq->dl);
 	set_task_cpu(next_task, later_rq->cpu);
+	add_running_bw(&next_task->dl, &later_rq->dl);
 	activate_task(later_rq, next_task, 0);
 	ret = 1;
 
@@ -1589,7 +1620,9 @@ static void pull_dl_task(struct rq *this_rq)
 			resched = true;
 
 			deactivate_task(src_rq, p, 0);
+			sub_running_bw(&p->dl, &src_rq->dl);
 			set_task_cpu(p, this_cpu);
+			add_running_bw(&p->dl, &this_rq->dl);
 			activate_task(this_rq, p, 0);
 			dmin = p->dl.deadline;
 
@@ -1695,6 +1728,9 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
 	if (!start_dl_timer(p))
 		__dl_clear_params(p);
 
+	if (task_on_rq_queued(p))
+		sub_running_bw(&p->dl, &rq->dl);
+
 	/*
 	 * Since this might be the only -deadline task on the rq,
 	 * this is the right place to try to pull some other one
@@ -1712,6 +1748,7 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
  */
 static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
+	add_running_bw(&p->dl, &rq->dl);
 
 	/* If p is not queued we will update its parameters at next wakeup. */
 	if (!task_on_rq_queued(p))
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 055f935..3a36c74 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -535,6 +535,12 @@ struct dl_rq {
 #else
 	struct dl_bw dl_bw;
 #endif
+	/*
+	 * "Active utilization" for this runqueue: increased when a
+	 * task wakes up (becomes TASK_RUNNING) and decreased when a
+	 * task blocks
+	 */
+	s64 running_bw;
 };
 
 #ifdef CONFIG_SMP
-- 
2.7.4