[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1288334618.8661.162.camel@Palantir>
Date: Fri, 29 Oct 2010 08:43:38 +0200
From: Raistlin <raistlin@...ux.it>
To: Peter Zijlstra <peterz@...radead.org>
Cc: Ingo Molnar <mingo@...e.hu>, Thomas Gleixner <tglx@...utronix.de>,
Steven Rostedt <rostedt@...dmis.org>,
Chris Friesen <cfriesen@...tel.com>, oleg@...hat.com,
Frederic Weisbecker <fweisbec@...il.com>,
Darren Hart <darren@...art.com>,
Johan Eker <johan.eker@...csson.com>,
"p.faure" <p.faure@...tech.ch>,
linux-kernel <linux-kernel@...r.kernel.org>,
Claudio Scordino <claudio@...dence.eu.com>,
michael trimarchi <trimarchi@...is.sssup.it>,
Fabio Checconi <fabio@...dalf.sssup.it>,
Tommaso Cucinotta <cucinotta@...up.it>,
Juri Lelli <juri.lelli@...il.com>,
Nicola Manica <nicola.manica@...i.unitn.it>,
Luca Abeni <luca.abeni@...tn.it>,
Dhaval Giani <dhaval@...is.sssup.it>,
Harald Gustafsson <hgu1972@...il.com>,
paulmck <paulmck@...ux.vnet.ibm.com>
Subject: [RFC][PATCH 20/22] sched: drafted deadline inheritance logic
Some method to deal with rt-mutexes and make sched_dl interact with
the current PI-coded is needed, raising all but trivial issues, that
needs (according to us) to be solved with some restructuring of
the pi-code (i.e., going toward a proxy execution-ish implementation).
This is under development, in the meanwhile, as a temporary solution,
what this commits does is:
- ensure a pi-lock owner with waiters is never throttled down. Instead,
when it runs out of runtime, it immediately gets replenished and it's
deadline is postponed (as in SF_BWRECL_DL reclaiming policy);
- the scheduling parameters (relative deadline and default runtime)
used for that replenishments --during the whole period it holds the
pi-lock-- are the ones of the waiting task with earliest deadline.
Acting this way, we provide some kind of boosting to the lock-owner,
still by using the existing (actually, slightly modified by the previous
commit) pi-architecture.
Signed-off-by: Dario Faggioli <raistlin@...ux.it>
---
include/linux/sched.h | 3 ++
kernel/fork.c | 1 +
kernel/rtmutex.c | 13 ++++++++-
kernel/sched.c | 3 +-
kernel/sched_dl.c | 65 +++++++++++++++++++++++++++++++------------------
5 files changed, 58 insertions(+), 27 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c3d1f17b..7cf78e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1535,6 +1535,8 @@ struct task_struct {
struct rb_node *pi_waiters_leftmost;
/* Deadlock detection and priority inheritance handling */
struct rt_mutex_waiter *pi_blocked_on;
+ /* */
+ struct task_struct *pi_top_task;
#endif
#ifdef CONFIG_DEBUG_MUTEXES
@@ -2118,6 +2120,7 @@ extern unsigned int sysctl_sched_compat_yield;
#ifdef CONFIG_RT_MUTEXES
extern int rt_mutex_getprio(struct task_struct *p);
+extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task);
extern void rt_mutex_setprio(struct task_struct *p, int prio);
extern void rt_mutex_adjust_pi(struct task_struct *p);
#else
diff --git a/kernel/fork.c b/kernel/fork.c
index aceb248..c8f2555 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -938,6 +938,7 @@ static void rt_mutex_init_task(struct task_struct *p)
p->pi_waiters = RB_ROOT;
p->pi_waiters_leftmost = NULL;
p->pi_blocked_on = NULL;
+ p->pi_top_task = NULL;
#endif
}
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 2e9c0dc..84ea165 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -196,6 +196,14 @@ int rt_mutex_getprio(struct task_struct *task)
task->normal_prio);
}
+struct task_struct *rt_mutex_get_top_task(struct task_struct *task)
+{
+ if (likely(!task_has_pi_waiters(task)))
+ return NULL;
+
+ return task_top_pi_waiter(task)->task;
+}
+
/*
* Adjust the priority of a task, after its pi_waiters got modified.
*
@@ -205,7 +213,7 @@ static void __rt_mutex_adjust_prio(struct task_struct *task)
{
int prio = rt_mutex_getprio(task);
- if (task->prio != prio)
+ if (task->prio != prio || dl_prio(prio))
rt_mutex_setprio(task, prio);
}
@@ -671,7 +679,8 @@ void rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
- if (!waiter || waiter->task->prio == task->prio) {
+ if (!waiter || (waiter->task->prio == task->prio &&
+ !dl_prio(task->prio))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 853473a..97db370 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4611,10 +4611,11 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
unsigned long flags;
struct rq *rq;
- BUG_ON(prio < 0 || prio > MAX_PRIO);
+ BUG_ON(prio > MAX_PRIO);
rq = task_rq_lock(p, &flags);
trace_sched_pi_setprio(p, prio);
+ p->pi_top_task = rt_mutex_get_top_task(p);
__setprio(rq, p, prio);
task_rq_unlock(rq, &flags);
}
diff --git a/kernel/sched_dl.c b/kernel/sched_dl.c
index 4d24109..991a4f2 100644
--- a/kernel/sched_dl.c
+++ b/kernel/sched_dl.c
@@ -223,15 +223,16 @@ static int push_dl_task(struct rq *rq);
* one, and to (try to!) reconcile itself with its own scheduling
* parameters.
*/
-static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
+static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se,
+ struct sched_dl_entity *pi_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
- dl_se->deadline = rq->clock + dl_se->dl_deadline;
- dl_se->runtime = dl_se->dl_runtime;
+ dl_se->deadline = rq->clock + pi_se->dl_deadline;
+ dl_se->runtime = pi_se->dl_runtime;
dl_se->dl_new = 0;
#ifdef CONFIG_SCHEDSTATS
trace_sched_stat_new_dl(dl_task_of(dl_se), rq->clock, dl_se->flags);
@@ -256,7 +257,8 @@ static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
* could happen are, typically, a entity voluntarily trying to overcume its
* runtime, or it just underestimated it during sched_setscheduler_ex().
*/
-static void replenish_dl_entity(struct sched_dl_entity *dl_se)
+static void replenish_dl_entity(struct sched_dl_entity *dl_se,
+ struct sched_dl_entity *pi_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -269,8 +271,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
* arbitrary large.
*/
while (dl_se->runtime <= 0) {
- dl_se->deadline += dl_se->dl_period;
- dl_se->runtime += dl_se->dl_runtime;
+ dl_se->deadline += pi_se->dl_period;
+ dl_se->runtime += pi_se->dl_runtime;
}
/*
@@ -284,8 +286,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
*/
if (dl_time_before(dl_se->deadline, rq->clock)) {
WARN_ON_ONCE(1);
- dl_se->deadline = rq->clock + dl_se->dl_deadline;
- dl_se->runtime = dl_se->dl_runtime;
+ dl_se->deadline = rq->clock + pi_se->dl_deadline;
+ dl_se->runtime = pi_se->dl_runtime;
reset = 1;
}
#ifdef CONFIG_SCHEDSTATS
@@ -306,7 +308,8 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
* task with deadline equal to period this is the same of using
* dl_deadline instead of dl_period in the equation above.
*/
-static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
+static bool dl_entity_overflow(struct sched_dl_entity *dl_se,
+ struct sched_dl_entity *pi_se, u64 t)
{
u64 left, right;
@@ -323,8 +326,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
* to the (absolute) deadline. Therefore, overflowing the u64
* type is very unlikely to occur in both cases.
*/
- left = dl_se->dl_deadline * dl_se->runtime;
- right = (dl_se->deadline - t) * dl_se->dl_runtime;
+ left = pi_se->dl_deadline * dl_se->runtime;
+ right = (dl_se->deadline - t) * pi_se->dl_runtime;
return dl_time_before(right, left);
}
@@ -338,7 +341,8 @@ static bool dl_entity_overflow(struct sched_dl_entity *dl_se, u64 t)
* - using the remaining runtime with the current deadline would make
* the entity exceed its bandwidth.
*/
-static void update_dl_entity(struct sched_dl_entity *dl_se)
+static void update_dl_entity(struct sched_dl_entity *dl_se,
+ struct sched_dl_entity *pi_se)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -349,14 +353,14 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
* the actual scheduling parameters have to be "renewed".
*/
if (dl_se->dl_new) {
- setup_new_dl_entity(dl_se);
+ setup_new_dl_entity(dl_se, pi_se);
return;
}
if (dl_time_before(dl_se->deadline, rq->clock) ||
- dl_entity_overflow(dl_se, rq->clock)) {
- dl_se->deadline = rq->clock + dl_se->dl_deadline;
- dl_se->runtime = dl_se->dl_runtime;
+ dl_entity_overflow(dl_se, pi_se, rq->clock)) {
+ dl_se->deadline = rq->clock + pi_se->dl_deadline;
+ dl_se->runtime = pi_se->dl_runtime;
overflow = 1;
}
#ifdef CONFIG_SCHEDSTATS
@@ -374,7 +378,7 @@ static void update_dl_entity(struct sched_dl_entity *dl_se)
* actually started or not (i.e., the replenishment instant is in
* the future or in the past).
*/
-static int start_dl_timer(struct sched_dl_entity *dl_se)
+static int start_dl_timer(struct sched_dl_entity *dl_se, bool boosted)
{
struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
struct rq *rq = rq_of_dl_rq(dl_rq);
@@ -391,7 +395,7 @@ static int start_dl_timer(struct sched_dl_entity *dl_se)
* This won't affect the other -deadline tasks, but if we are
* a CPU-hog, lower scheduling classes will starve!
*/
- if (dl_se->flags & SF_BWRECL_DL)
+ if (boosted || dl_se->flags & SF_BWRECL_DL)
return 0;
/*
@@ -595,7 +599,7 @@ static void update_curr_dl(struct rq *rq)
dl_se->runtime -= delta_exec;
if (dl_runtime_exceeded(rq, dl_se)) {
__dequeue_task_dl(rq, curr, 0);
- if (likely(start_dl_timer(dl_se)))
+ if (likely(start_dl_timer(dl_se, !!curr->pi_top_task)))
throttle_curr_dl(rq, curr);
else
enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH);
@@ -749,7 +753,8 @@ static void __dequeue_dl_entity(struct sched_dl_entity *dl_se)
}
static void
-enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
+enqueue_dl_entity(struct sched_dl_entity *dl_se,
+ struct sched_dl_entity *pi_se, int flags)
{
BUG_ON(on_dl_rq(dl_se));
@@ -759,9 +764,9 @@ enqueue_dl_entity(struct sched_dl_entity *dl_se, int flags)
* we want a replenishment of its runtime.
*/
if (!dl_se->dl_new && flags & ENQUEUE_REPLENISH)
- replenish_dl_entity(dl_se);
+ replenish_dl_entity(dl_se, pi_se);
else
- update_dl_entity(dl_se);
+ update_dl_entity(dl_se, pi_se);
__enqueue_dl_entity(dl_se);
}
@@ -773,6 +778,18 @@ static void dequeue_dl_entity(struct sched_dl_entity *dl_se)
static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
{
+ struct task_struct *pi_task = p->pi_top_task;
+ struct sched_dl_entity *pi_se = &p->dl;
+
+ /*
+ * Use the scheduling parameters of the top pi-waiter
+ * task if we have one and its (relative) deadline is
+ * smaller than our one... OTW we keep our runtime and
+ * deadline.
+ */
+ if (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))
+ pi_se = &pi_task->dl;
+
/*
* If p is throttled, we do nothing. In fact, if it exhausted
* its budget it needs a replenishment and, since it now is on
@@ -782,7 +799,7 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
if (p->dl.dl_throttled)
return;
- enqueue_dl_entity(&p->dl, flags);
+ enqueue_dl_entity(&p->dl, pi_se, flags);
if (!task_current(rq, p) && p->dl.nr_cpus_allowed > 1)
enqueue_pushable_dl_task(rq, p);
@@ -847,7 +864,7 @@ static long wait_interval_dl(struct task_struct *p, struct timespec *rqtp,
*/
wakeup = timespec_to_ns(rqtp);
if (dl_time_before(wakeup, dl_se->deadline) &&
- !dl_entity_overflow(dl_se, wakeup)) {
+ !dl_entity_overflow(dl_se, dl_se, wakeup)) {
u64 ibw = (u64)dl_se->runtime * dl_se->dl_period;
ibw = div_u64(ibw, dl_se->dl_runtime);
--
1.7.2.3
--
<<This happens because I choose it to happen!>> (Raistlin Majere)
----------------------------------------------------------------------
Dario Faggioli, ReTiS Lab, Scuola Superiore Sant'Anna, Pisa (Italy)
http://blog.linux.it/raistlin / raistlin@...ga.net /
dario.faggioli@...ber.org
Download attachment "signature.asc" of type "application/pgp-signature" (199 bytes)
Powered by blists - more mailing lists