[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20070331094848.GA7829@elte.hu>
Date: Sat, 31 Mar 2007 11:48:48 +0200
From: Ingo Molnar <mingo@...e.hu>
To: Xenofon Antidides <xantidides@...oo.gr>
Cc: Mike Galbraith <efault@....de>, Con Kolivas <kernel@...ivas.org>,
linux list <linux-kernel@...r.kernel.org>,
Andrew Morton <akpm@...ux-foundation.org>
Subject: [patch] sched: improve fairness, v3
* Ingo Molnar <mingo@...e.hu> wrote:
> also, could you possibly try these workloads you described with Mike's
> latest patch too? Thanks,
i.e. the one below.
Ingo
--------------------->
Subject: [patch] sched: improve fairness, v3
From: Mike Galbraith <efault@....de>
track interactive backlog as a way to detect when the load isn't really
an interactive load, that's very simple and has potential.
Signed-off-by: Ingo Molnar <mingo@...e.hu>
---
kernel/sched.c | 115 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 108 insertions(+), 7 deletions(-)
Index: linux/kernel/sched.c
===================================================================
--- linux.orig/kernel/sched.c
+++ linux/kernel/sched.c
@@ -109,6 +109,7 @@ unsigned long long __attribute__((weak))
#define MAX_SLEEP_AVG (DEF_TIMESLICE * MAX_BONUS)
#define STARVATION_LIMIT (MAX_SLEEP_AVG)
#define NS_MAX_SLEEP_AVG (JIFFIES_TO_NS(MAX_SLEEP_AVG))
+#define INTERACTIVE_LIMIT (DEF_TIMESLICE * 4)
/*
* If a task is 'interactive' then we reinsert it in the active
@@ -167,6 +168,9 @@ unsigned long long __attribute__((weak))
(JIFFIES_TO_NS(MAX_SLEEP_AVG * \
(MAX_BONUS / 2 + DELTA((p)) + 1) / MAX_BONUS - 1))
+#define INTERACTIVE_BACKLOG_EXCEEDED(array) \
+ ((array)->interactive_ticks > INTERACTIVE_LIMIT)
+
#define TASK_PREEMPTS_CURR(p, rq) \
((p)->prio < (rq)->curr->prio)
@@ -201,6 +205,7 @@ static inline unsigned int task_timeslic
struct prio_array {
unsigned int nr_active;
+ int interactive_ticks;
DECLARE_BITMAP(bitmap, MAX_PRIO+1); /* include 1 bit for delimiter */
struct list_head queue[MAX_PRIO];
};
@@ -234,6 +239,7 @@ struct rq {
*/
unsigned long nr_uninterruptible;
+ unsigned long switch_timestamp;
unsigned long expired_timestamp;
/* Cached timestamp set by update_cpu_clock() */
unsigned long long most_recent_timestamp;
@@ -691,6 +697,8 @@ static void dequeue_task(struct task_str
list_del(&p->run_list);
if (list_empty(array->queue + p->prio))
__clear_bit(p->prio, array->bitmap);
+ if (TASK_INTERACTIVE(p))
+ array->interactive_ticks -= p->time_slice;
}
static void enqueue_task(struct task_struct *p, struct prio_array *array)
@@ -700,6 +708,8 @@ static void enqueue_task(struct task_str
__set_bit(p->prio, array->bitmap);
array->nr_active++;
p->array = array;
+ if (TASK_INTERACTIVE(p))
+ array->interactive_ticks += p->time_slice;
}
/*
@@ -882,7 +892,11 @@ static int recalc_task_prio(struct task_
/* Caller must always ensure 'now >= p->timestamp' */
unsigned long sleep_time = now - p->timestamp;
- if (batch_task(p))
+ /*
+ * Migration timestamp adjustment may induce negative time.
+ * Ignore unquantifiable values as well as SCHED_BATCH tasks.
+ */
+ if (now < p->timestamp || batch_task(p))
sleep_time = 0;
if (likely(sleep_time > 0)) {
@@ -3051,9 +3065,9 @@ static inline int expired_starving(struc
{
if (rq->curr->static_prio > rq->best_expired_prio)
return 1;
- if (!STARVATION_LIMIT || !rq->expired_timestamp)
+ if (!STARVATION_LIMIT)
return 0;
- if (jiffies - rq->expired_timestamp > STARVATION_LIMIT * rq->nr_running)
+ if (jiffies - rq->switch_timestamp > STARVATION_LIMIT * rq->nr_running)
return 1;
return 0;
}
@@ -3131,8 +3145,74 @@ void account_steal_time(struct task_stru
cpustat->steal = cputime64_add(cpustat->steal, tmp);
}
+/*
+ * Promote and requeue the next lower priority task. If no task
+ * is available in the active array, switch to the expired array.
+ * @rq: runqueue to search.
+ * @prio: priority at which to begin search.
+ */
+static inline void promote_next_lower(struct rq *rq, int prio)
+{
+ struct prio_array *array = rq->active;
+ struct task_struct *p = NULL;
+ unsigned long long now = rq->most_recent_timestamp;
+ unsigned long *bitmap;
+ unsigned long starving = JIFFIES_TO_NS(rq->nr_running * DEF_TIMESLICE);
+ int idx = prio + 1, found_noninteractive = 0;
+
+repeat:
+ bitmap = array->bitmap;
+ idx = find_next_bit(bitmap, MAX_PRIO, idx);
+ if (idx < MAX_PRIO) {
+ struct list_head *queue = array->queue + idx;
+
+ p = list_entry(queue->next, struct task_struct, run_list);
+ if (!TASK_INTERACTIVE(p))
+ found_noninteractive = 1;
+
+ /* Skip non-starved queues. */
+ if (now < p->last_ran + starving) {
+ idx++;
+ p = NULL;
+ goto repeat;
+ }
+ } else if (!found_noninteractive && array == rq->active) {
+ /* Nobody home, check the expired array. */
+ array = rq->expired;
+ idx = 0;
+ p = NULL;
+ goto repeat;
+ }
+
+ /* Found one, requeue it. */
+ if (p) {
+ dequeue_task(p, p->array);
+ if (array == rq->active)
+ p->prio--;
+ /*
+ * If we pulled a task from the expired array, correct
+ * expired array info. We can't afford a full search
+ * for best_expired_prio, but do the best we can.
+ */
+ else {
+ idx = sched_find_first_bit(array->bitmap);
+ if (idx < MAX_PRIO) {
+ if (rq->best_expired_prio > idx)
+ rq->best_expired_prio = idx;
+ } else {
+ /* We emptied the array */
+ rq->best_expired_prio = MAX_PRIO;
+ rq->switch_timestamp = jiffies;
+ }
+ }
+ enqueue_task(p, rq->active);
+ }
+}
+
static void task_running_tick(struct rq *rq, struct task_struct *p)
{
+ int task_was_interactive;
+
if (p->array != rq->active) {
/* Task has expired but was not scheduled yet */
set_tsk_need_resched(p);
@@ -3161,21 +3241,41 @@ static void task_running_tick(struct rq
}
goto out_unlock;
}
+
+ /*
+ * Tick off interactive task ticks from the active array.
+ */
+ task_was_interactive = TASK_INTERACTIVE(p);
+ if (task_was_interactive && --rq->active->interactive_ticks < 0)
+ rq->active->interactive_ticks = 0;
+
if (!--p->time_slice) {
+ int expire_limit = STARVATION_LIMIT;
+
dequeue_task(p, rq->active);
set_tsk_need_resched(p);
p->prio = effective_prio(p);
p->time_slice = task_timeslice(p);
p->first_time_slice = 0;
- if (!rq->expired_timestamp)
- rq->expired_timestamp = jiffies;
if (!TASK_INTERACTIVE(p) || expired_starving(rq)) {
enqueue_task(p, rq->expired);
if (p->static_prio < rq->best_expired_prio)
rq->best_expired_prio = p->static_prio;
+ if (!task_was_interactive)
+ rq->expired_timestamp = jiffies;
} else
enqueue_task(p, rq->active);
+
+ /*
+ * If we haven't expired a non-interactive task within
+ * STARVATION_LIMIT ticks, or the current interactive
+ * load exceeds INTERACTIVE_BACKLOG, start promoting
+ * lower priority tasks.
+ */
+ if (time_after(jiffies, rq->expired_timestamp + expire_limit) ||
+ INTERACTIVE_BACKLOG_EXCEEDED(rq->active))
+ promote_next_lower(rq, p->prio);
} else {
/*
* Prevent a too long timeslice allowing a task to monopolize
@@ -3356,7 +3456,7 @@ need_resched_nonpreemptible:
idle_balance(cpu, rq);
if (!rq->nr_running) {
next = rq->idle;
- rq->expired_timestamp = 0;
+ rq->switch_timestamp = jiffies;
goto switch_tasks;
}
}
@@ -3370,7 +3470,8 @@ need_resched_nonpreemptible:
rq->active = rq->expired;
rq->expired = array;
array = rq->active;
- rq->expired_timestamp = 0;
+ array->interactive_ticks = 0;
+ rq->switch_timestamp = jiffies;
rq->best_expired_prio = MAX_PRIO;
}
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists