[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <tip-523e979d31648112bad07f427c183525c0258c75@git.kernel.org>
Date: Sun, 15 Jul 2018 16:32:41 -0700
From: tip-bot for Vincent Guittot <tipbot@...or.com>
To: linux-tip-commits@...r.kernel.org
Cc: torvalds@...ux-foundation.org, vincent.guittot@...aro.org,
linux-kernel@...r.kernel.org, peterz@...radead.org,
tglx@...utronix.de, hpa@...or.com, mingo@...nel.org
Subject: [tip:sched/core] sched/core: Use PELT for scale_rt_capacity()
Commit-ID: 523e979d31648112bad07f427c183525c0258c75
Gitweb: https://git.kernel.org/tip/523e979d31648112bad07f427c183525c0258c75
Author: Vincent Guittot <vincent.guittot@...aro.org>
AuthorDate: Thu, 28 Jun 2018 17:45:12 +0200
Committer: Ingo Molnar <mingo@...nel.org>
CommitDate: Mon, 16 Jul 2018 00:16:25 +0200
sched/core: Use PELT for scale_rt_capacity()
The utilization of the CPU by RT, DL and IRQs are now tracked with
PELT so we can use these metrics instead of rt_avg to evaluate the remaining
capacity available for CFS class.
scale_rt_capacity() behavior has been changed and now returns the remaining
capacity available for CFS instead of a scaling factor because RT, DL and
IRQ provide now absolute utilization value.
The same formula as schedutil is used:
IRQ util_avg + (1 - IRQ util_avg / max capacity ) * /Sum rq util_avg
but the implementation is different because it doesn't return the same value
and doesn't benefit of the same optimization.
Signed-off-by: Vincent Guittot <vincent.guittot@...aro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@...radead.org>
Cc: Linus Torvalds <torvalds@...ux-foundation.org>
Cc: Morten.Rasmussen@....com
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: claudio@...dence.eu.com
Cc: daniel.lezcano@...aro.org
Cc: dietmar.eggemann@....com
Cc: joel@...lfernandes.org
Cc: juri.lelli@...hat.com
Cc: luca.abeni@...tannapisa.it
Cc: patrick.bellasi@....com
Cc: quentin.perret@....com
Cc: rjw@...ysocki.net
Cc: valentin.schneider@....com
Cc: viresh.kumar@...aro.org
Link: http://lkml.kernel.org/r/1530200714-4504-10-git-send-email-vincent.guittot@linaro.org
Signed-off-by: Ingo Molnar <mingo@...nel.org>
---
kernel/sched/deadline.c | 2 --
kernel/sched/fair.c | 44 ++++++++++++++++++++++----------------------
kernel/sched/pelt.c | 2 +-
kernel/sched/rt.c | 2 --
4 files changed, 23 insertions(+), 27 deletions(-)
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index f4de26982d80..68b8a9f1c9ca 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1180,8 +1180,6 @@ static void update_curr_dl(struct rq *rq)
curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
- sched_rt_avg_update(rq, delta_exec);
-
if (dl_entity_is_special(dl_se))
return;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c2782b29c79f..d265fa9756a2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7551,39 +7551,39 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
static unsigned long scale_rt_capacity(int cpu)
{
struct rq *rq = cpu_rq(cpu);
- u64 total, used, age_stamp, avg;
- s64 delta;
-
- /*
- * Since we're reading these variables without serialization make sure
- * we read them once before doing sanity checks on them.
- */
- age_stamp = READ_ONCE(rq->age_stamp);
- avg = READ_ONCE(rq->rt_avg);
- delta = __rq_clock_broken(rq) - age_stamp;
+ unsigned long max = arch_scale_cpu_capacity(NULL, cpu);
+ unsigned long used, free;
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ unsigned long irq;
+#endif
- if (unlikely(delta < 0))
- delta = 0;
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ irq = READ_ONCE(rq->avg_irq.util_avg);
- total = sched_avg_period() + delta;
+ if (unlikely(irq >= max))
+ return 1;
+#endif
- used = div_u64(avg, total);
+ used = READ_ONCE(rq->avg_rt.util_avg);
+ used += READ_ONCE(rq->avg_dl.util_avg);
- if (likely(used < SCHED_CAPACITY_SCALE))
- return SCHED_CAPACITY_SCALE - used;
+ if (unlikely(used >= max))
+ return 1;
- return 1;
+ free = max - used;
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+ free *= (max - irq);
+ free /= max;
+#endif
+ return free;
}
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
{
- unsigned long capacity = arch_scale_cpu_capacity(sd, cpu);
+ unsigned long capacity = scale_rt_capacity(cpu);
struct sched_group *sdg = sd->groups;
- cpu_rq(cpu)->cpu_capacity_orig = capacity;
-
- capacity *= scale_rt_capacity(cpu);
- capacity >>= SCHED_CAPACITY_SHIFT;
+ cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(sd, cpu);
if (!capacity)
capacity = 1;
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index ead6d8b4a8b8..35475c0c5419 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -237,7 +237,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runna
*/
sa->load_avg = div_u64(load * sa->load_sum, divider);
sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
- sa->util_avg = sa->util_sum / divider;
+ WRITE_ONCE(sa->util_avg, sa->util_sum / divider);
}
/*
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 0dc8ad1915e6..2df72abfa24a 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -973,8 +973,6 @@ static void update_curr_rt(struct rq *rq)
curr->se.exec_start = now;
cgroup_account_cputime(curr, delta_exec);
- sched_rt_avg_update(rq, delta_exec);
-
if (!rt_bandwidth_enabled())
return;
Powered by blists - more mailing lists