[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20120628022414.30496.11931.stgit@kitami.mtv.corp.google.com>
Date: Wed, 27 Jun 2012 19:24:14 -0700
From: Paul Turner <pjt@...gle.com>
To: linux-kernel@...r.kernel.org
Cc: Venki Pallipadi <venki@...gle.com>,
Srivatsa Vaddagiri <vatsa@...ibm.com>,
Vincent Guittot <vincent.guittot@...aro.org>,
Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Nikunj A Dadhania <nikunj@...ux.vnet.ibm.com>,
Mike Galbraith <efault@....de>,
Kamalesh Babulal <kamalesh@...ux.vnet.ibm.com>,
Ben Segall <bsegall@...gle.com>, Ingo Molnar <mingo@...e.hu>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Morten Rasmussen <Morten.Rasmussen@....com>,
Vaidyanathan Srinivasan <svaidy@...ux.vnet.ibm.com>
Subject: [PATCH 09/16] sched: normalize tg load contributions against runnable
time
Entities of equal weight should receive equitable distribution of cpu time.
This is challenging in the case of a task_group's shares as execution may be
occurring on multiple cpus simultaneously.
To handle this we divide up the shares into weights proportionate with the load
on each cfs_rq. This does not however, account for the fact that the sum of
the parts may be less than one cpu and so we need to normalize:
load(tg) = min(runnable_avg(tg), 1) * tg->shares
Where runnable_avg is the aggregate time in which the task_group had runnable
children.
Signed-off-by: Paul Turner <pjt@...gle.com>
Signed-off-by: Ben Segall <bsegall@...gle.com>.
---
kernel/sched/debug.c | 4 ++++
kernel/sched/fair.c | 39 +++++++++++++++++++++++++++++++++++++++
kernel/sched/sched.h | 2 ++
3 files changed, 45 insertions(+), 0 deletions(-)
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 9268fb7..9334c68 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -237,6 +237,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
atomic64_read(&cfs_rq->tg->load_avg));
SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib",
cfs_rq->tg_load_contrib);
+ SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib",
+ cfs_rq->tg_runnable_contrib);
+ SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg",
+ atomic_read(&cfs_rq->tg->runnable_avg));
#endif
print_cfs_group_stats(m, cpu, cfs_rq->tg);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a416296..91d0b21 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1117,19 +1117,56 @@ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
}
}
+/*
+ * Aggregate cfs_rq runnable averages into an equivalent task_group
+ * representation for computing load contributions.
+ */
+static inline void __update_tg_runnable_avg(struct sched_avg *sa,
+ struct cfs_rq *cfs_rq)
+{
+ struct task_group *tg = cfs_rq->tg;
+ long contrib;
+
+ contrib = div_u64(sa->runnable_avg_sum << 12,
+ sa->runnable_avg_period + 1);
+ contrib -= cfs_rq->tg_runnable_contrib;
+
+ if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
+ atomic_add(contrib, &tg->runnable_avg);
+ cfs_rq->tg_runnable_contrib += contrib;
+ }
+}
+
static inline void __update_group_entity_contrib(struct sched_entity *se)
{
struct cfs_rq *cfs_rq = group_cfs_rq(se);
struct task_group *tg = cfs_rq->tg;
+ int runnable_avg;
+
u64 contrib;
contrib = cfs_rq->tg_load_contrib * tg->shares;
se->avg.load_avg_contrib = div64_u64(contrib,
atomic64_read(&tg->load_avg) + 1);
+
+ /*
+ * Unlike a task-entity, a group entity may be using >=1 cpu globally.
+ * However, in the case that it's using <1 cpu we need to form a
+ * correction term so that we contribute the same load as a task of
+ * equal weight. (Global runnable time is taken as a fraction over
+ * 2^12.)
+ */
+ runnable_avg = atomic_read(&tg->runnable_avg);
+ if (runnable_avg < (1<<12)) {
+ se->avg.load_avg_contrib *= runnable_avg;
+ se->avg.load_avg_contrib /= (1<<12);
+ }
}
#else
static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
int force_update) {}
+static inline void __update_tg_runnable_avg(struct sched_avg *sa,
+ struct cfs_rq *cfs_rq) {}
static inline void __update_group_entity_contrib(struct sched_entity *se) {}
#endif
@@ -1151,6 +1188,7 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
if (entity_is_task(se)) {
__update_task_entity_contrib(se);
} else {
+ __update_tg_runnable_avg(&se->avg, group_cfs_rq(se));
__update_group_entity_contrib(se);
}
@@ -1219,6 +1257,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
{
__update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
+ __update_tg_runnable_avg(&rq->avg, &rq->cfs);
}
/* Add the load generated by se into cfs_rq's child load-average */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4d3b3ad..b48bbd7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -113,6 +113,7 @@ struct task_group {
atomic_t load_weight;
atomic64_t load_avg;
+ atomic_t runnable_avg;
#endif
#ifdef CONFIG_RT_GROUP_SCHED
@@ -234,6 +235,7 @@ struct cfs_rq {
atomic64_t decay_counter, removed_load;
u64 last_decay;
#ifdef CONFIG_FAIR_GROUP_SCHED
+ u32 tg_runnable_contrib;
u64 tg_load_contrib;
#endif
#endif
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists