[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090930125336.GF19951@in.ibm.com>
Date: Wed, 30 Sep 2009 18:23:36 +0530
From: Bharata B Rao <bharata@...ux.vnet.ibm.com>
To: linux-kernel@...r.kernel.org
Cc: Dhaval Giani <dhaval@...ux.vnet.ibm.com>,
Balbir Singh <balbir@...ux.vnet.ibm.com>,
Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>,
Gautham R Shenoy <ego@...ibm.com>,
Srivatsa Vaddagiri <vatsa@...ibm.com>,
Ingo Molnar <mingo@...e.hu>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Pavel Emelyanov <xemul@...nvz.org>,
Herbert Poetzl <herbert@...hfloor.at>,
Avi Kivity <avi@...hat.com>,
Chris Friesen <cfriesen@...tel.com>,
Paul Menage <menage@...gle.com>,
Mike Waychison <mikew@...gle.com>
Subject: [RFC v2 PATCH 5/8] sched: Unthrottle the throttled tasks
sched: Unthrottle the throttled tasks.
From: Bharata B Rao <bharata@...ux.vnet.ibm.com>
Refresh runtimes when group's bandwidth period expires. Unthrottle any
throttled groups at that time. Refreshing runtimes is driven through
a periodic timer.
Signed-off-by: Bharata B Rao <bharata@...ux.vnet.ibm.com>
---
kernel/sched.c | 15 ++++++++-
kernel/sched_fair.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 94 insertions(+), 2 deletions(-)
diff --git a/kernel/sched.c b/kernel/sched.c
index 04c505f..ec302ac 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1820,6 +1820,7 @@ static inline u64 global_cfs_runtime(void)
}
int task_group_throttled(struct task_group *tg, int cpu);
+void do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b);
static inline int cfs_bandwidth_enabled(struct task_group *tg)
{
@@ -1845,6 +1846,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
struct cfs_bandwidth *cfs_b =
container_of(timer, struct cfs_bandwidth, cfs_period_timer);
+ do_sched_cfs_period_timer(cfs_b);
hrtimer_add_expires_ns(timer, ktime_to_ns(cfs_b->cfs_period));
return HRTIMER_RESTART;
}
@@ -10588,15 +10590,24 @@ long tg_get_cfs_period(struct task_group *tg)
int tg_set_hard_limit_enabled(struct task_group *tg, u64 val)
{
- spin_lock_irq(&tg->cfs_bandwidth.cfs_runtime_lock);
+ local_irq_disable();
+ spin_lock(&tg->cfs_bandwidth.cfs_runtime_lock);
if (val > 0) {
tg->hard_limit_enabled = 1;
start_cfs_bandwidth(tg);
+ spin_unlock(&tg->cfs_bandwidth.cfs_runtime_lock);
} else {
destroy_cfs_bandwidth(tg);
tg->hard_limit_enabled = 0;
+ spin_unlock(&tg->cfs_bandwidth.cfs_runtime_lock);
+ /*
+ * Hard limiting is being disabled for this group.
+ * Refresh runtimes and put the throttled entities
+ * of the group back onto runqueue.
+ */
+ do_sched_cfs_period_timer(&tg->cfs_bandwidth);
}
- spin_unlock_irq(&tg->cfs_bandwidth.cfs_runtime_lock);
+ local_irq_enable();
return 0;
}
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f98c1c8..8c8b602 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -249,6 +249,80 @@ int task_group_throttled(struct task_group *tg, int cpu)
return 0;
}
+static void enqueue_entity_locked(struct cfs_rq *cfs_rq,
+ struct sched_entity *se, int wakeup);
+static void add_cfs_rq_tasks_running(struct sched_entity *se,
+ unsigned long count);
+static void sub_cfs_rq_tasks_running(struct sched_entity *se,
+ unsigned long count);
+
+static void enqueue_throttled_entity(struct rq *rq, struct sched_entity *se)
+{
+ unsigned long nr_tasks = 0;
+ struct sched_entity *se_tmp = se;
+ int throttled = 0;
+
+ for_each_sched_entity(se) {
+ if (se->on_rq)
+ break;
+
+ if (entity_throttled(se)) {
+ throttled = 1;
+ break;
+ }
+
+ enqueue_entity_locked(cfs_rq_of(se), se, 0);
+ nr_tasks += group_cfs_rq(se)->nr_tasks_running;
+ }
+
+ if (!nr_tasks)
+ return;
+
+ /*
+ * Add the number of tasks this entity has to
+ * all of its parent entities.
+ */
+ add_cfs_rq_tasks_running(se_tmp, nr_tasks);
+
+ /*
+ * Add the number of tasks this entity has to
+ * this cpu's rq only if the entity got enqueued all the
+ * way up without any throttled entity in the hierarchy.
+ */
+ if (!throttled)
+ rq->nr_running += nr_tasks;
+}
+
+/*
+ * Refresh runtimes of all cfs_rqs in this group, i,e.,
+ * refresh runtimes of the representative cfs_rq of this
+ * tg on all cpus. Enqueue any throttled entity back.
+ */
+void do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b)
+{
+ int i;
+ const struct cpumask *span = sched_bw_period_mask();
+ struct task_group *tg = container_of(cfs_b, struct task_group,
+ cfs_bandwidth);
+ unsigned long flags;
+
+ for_each_cpu(i, span) {
+ struct rq *rq = cpu_rq(i);
+ struct cfs_rq *cfs_rq = tg->cfs_rq[i];
+ struct sched_entity *se = tg->se[i];
+
+ spin_lock_irqsave(&rq->lock, flags);
+ rq_runtime_lock(rq);
+ cfs_rq->cfs_time = 0;
+ if (cfs_rq_throttled(cfs_rq)) {
+ cfs_rq->cfs_throttled = 0;
+ enqueue_throttled_entity(rq, se);
+ }
+ rq_runtime_unlock(rq);
+ spin_unlock_irqrestore(&rq->lock, flags);
+ }
+}
+
#else
static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
@@ -348,6 +422,13 @@ static void add_cfs_rq_tasks_running(struct sched_entity *se,
struct cfs_rq *cfs_rq;
for_each_sched_entity(se) {
+ /*
+ * If any entity in the hierarchy is throttled, don't
+ * propogate the tasks count up since this entity isn't
+ * on rq yet.
+ */
+ if (entity_throttled(se))
+ break;
cfs_rq = cfs_rq_of(se);
cfs_rq->nr_tasks_running += count;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists