[<prev] [next>] [day] [month] [year] [list]
Message-Id: <200704230047.23027.kernel@kolivas.org>
Date: Mon, 23 Apr 2007 00:47:22 +1000
From: Con Kolivas <kernel@...ivas.org>
To: Andrew Morton <akpm@...ux-foundation.org>,
linux kernel mailing list <linux-kernel@...r.kernel.org>,
ck list <ck@....kolivas.org>, Ingo Molnar <mingo@...e.hu>,
Willy Tarreau <w@....eu>
Subject: [PATCH] sched: implement staircase deadline scheduler load weight fix
The task load_weight needs to be set every time the quota is set and wasn't
being set in activate_task which assumed it would not have changed. Due to
changes in where the default rr_interval is set on SMP this assumption
failed. Also if one were to change rr_interval on the fly it would break
again.
set_load_weight was unnecessarily complex in the relationship as it could
be simply set to the task_timeslice in milliseconds. It also would not scale
enough to pick up nice 19 tasks and could give them 0 weight with a small
enough rr_interval.
Thanks to Willy Tarreau <w@....eu> for spotting more smp balancing problems.
Signed-off-by: Con Kolivas <kernel@...ivas.org>
---
kernel/sched.c | 36 +++++++++++++++++-------------------
1 file changed, 17 insertions(+), 19 deletions(-)
Index: linux-2.6.21-rc7-sd/kernel/sched.c
===================================================================
--- linux-2.6.21-rc7-sd.orig/kernel/sched.c 2007-04-22 21:37:25.000000000 +1000
+++ linux-2.6.21-rc7-sd/kernel/sched.c 2007-04-22 23:04:34.000000000 +1000
@@ -102,8 +102,6 @@ unsigned long long __attribute__((weak))
*/
int rr_interval __read_mostly = 8;
-#define DEF_TIMESLICE (rr_interval * 20)
-
/*
* This contains a bitmap for each dynamic priority level with empty slots
* for the valid priorities each different nice level can have. It allows
@@ -886,16 +884,11 @@ static int task_timeslice(struct task_st
}
/*
- * Assume: static_prio_timeslice(NICE_TO_PRIO(0)) == DEF_TIMESLICE
- * If static_prio_timeslice() is ever changed to break this assumption then
- * this code will need modification. Scaled as multiples of milliseconds.
- */
-#define TIME_SLICE_NICE_ZERO DEF_TIMESLICE
-#define LOAD_WEIGHT(lp) \
- (((lp) * SCHED_LOAD_SCALE) / TIME_SLICE_NICE_ZERO)
-#define TASK_LOAD_WEIGHT(p) LOAD_WEIGHT(task_timeslice(p))
-#define RTPRIO_TO_LOAD_WEIGHT(rp) \
- (LOAD_WEIGHT((rr_interval + 20 + (rp))))
+ * The load weight is basically the task_timeslice in ms. Realtime tasks are
+ * special cased to be proportionately larger than nice -20 by their
+ * rt_priority. The weight for rt tasks can only be arbitrary at best.
+ */
+#define RTPRIO_TO_LOAD_WEIGHT(rp) (rr_interval * 20 * (40 + rp))
static void set_load_weight(struct task_struct *p)
{
@@ -912,7 +905,7 @@ static void set_load_weight(struct task_
#endif
p->load_weight = RTPRIO_TO_LOAD_WEIGHT(p->rt_priority);
} else
- p->load_weight = TASK_LOAD_WEIGHT(p);
+ p->load_weight = task_timeslice(p);
}
static inline void
@@ -995,7 +988,7 @@ static int effective_prio(struct task_st
* nice -20 = 10 * rr_interval. nice 1-19 = rr_interval / 2.
* Value returned is in microseconds.
*/
-static unsigned int rr_quota(struct task_struct *p)
+static inline unsigned int rr_quota(struct task_struct *p)
{
int nice = TASK_NICE(p), rr = rr_interval;
@@ -1009,6 +1002,13 @@ static unsigned int rr_quota(struct task
return MS_TO_US(rr);
}
+/* Every time we set the quota we need to set the load weight */
+static void set_quota(struct task_struct *p)
+{
+ p->quota = rr_quota(p);
+ set_load_weight(p);
+}
+
/*
* activate_task - move a task to the runqueue and do priority recalculation
*/
@@ -1036,7 +1036,7 @@ static void activate_task(struct task_st
(now - p->timestamp) >> 20);
}
- p->quota = rr_quota(p);
+ set_quota(p);
p->prio = effective_prio(p);
p->timestamp = now;
__activate_task(p, rq);
@@ -3885,8 +3885,7 @@ void set_user_nice(struct task_struct *p
p->static_prio = NICE_TO_PRIO(nice);
old_prio = p->prio;
p->prio = effective_prio(p);
- p->quota = rr_quota(p);
- set_load_weight(p);
+ set_quota(p);
delta = p->prio - old_prio;
if (queued) {
@@ -4020,8 +4019,7 @@ static void __setscheduler(struct task_s
p->normal_prio = normal_prio(p);
/* we are holding p->pi_lock already */
p->prio = rt_mutex_getprio(p);
- p->quota = rr_quota(p);
- set_load_weight(p);
+ set_quota(p);
}
/**
--
-ck
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists