[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1459516987-15745-3-git-send-email-fweisbec@gmail.com>
Date: Fri, 1 Apr 2016 15:23:05 +0200
From: Frederic Weisbecker <fweisbec@...il.com>
To: Peter Zijlstra <peterz@...radead.org>
Cc: LKML <linux-kernel@...r.kernel.org>,
Frederic Weisbecker <fweisbec@...il.com>,
Byungchul Park <byungchul.park@....com>,
Chris Metcalf <cmetcalf@...hip.com>,
Thomas Gleixner <tglx@...utronix.de>,
Luiz Capitulino <lcapitulino@...hat.com>,
Christoph Lameter <cl@...ux.com>,
"Paul E . McKenney" <paulmck@...ux.vnet.ibm.com>,
Mike Galbraith <efault@....de>, Rik van Riel <riel@...hat.com>,
Ingo Molnar <mingo@...e.hu>
Subject: [PATCH 2/4] sched: Correctly handle nohz ticks cpu load accounting
Ticks can happen in the middle of a nohz frame and
cpu_load_update_active() doesn't handle these correctly. It forgets the
whole previous tickless load and just records the current tick, ignoring
potentially long idle periods.
In order to solve this, record the load on nohz frame entry so we know
what to record in case of nohz interruptions, then use this recorded load
to account the tickless load on nohz ticks and nohz frame end.
Cc: Byungchul Park <byungchul.park@....com>
Cc: Chris Metcalf <cmetcalf@...hip.com>
Cc: Christoph Lameter <cl@...ux.com>
Cc: Ingo Molnar <mingo@...e.hu>
Cc: Luiz Capitulino <lcapitulino@...hat.com>
Cc: Mike Galbraith <efault@....de>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Rik van Riel <riel@...hat.com>
Cc: Thomas Gleixner <tglx@...utronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
---
include/linux/sched.h | 6 +++--
kernel/sched/fair.c | 63 +++++++++++++++++++++++++++++-------------------
kernel/time/tick-sched.c | 9 ++++---
3 files changed, 47 insertions(+), 31 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 86adc0e..6f9415a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -178,9 +178,11 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks);
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-extern void cpu_load_update_nohz(int active);
+extern void cpu_load_update_nohz_start(void);
+extern void cpu_load_update_nohz_stop(void);
#else
-static inline void cpu_load_update_nohz(int active) { }
+static inline void cpu_load_update_nohz_start(void) { }
+static inline void cpu_load_update_nohz_stop(void) { }
#endif
extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f33764d..394f008 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4527,9 +4527,9 @@ decay_load_missed(unsigned long load, unsigned long missed_updates, int idx)
* term. See the @active paramter.
*/
static void __cpu_load_update(struct rq *this_rq, unsigned long this_load,
- unsigned long pending_updates, int active)
+ unsigned long pending_updates)
{
- unsigned long tickless_load = active ? this_rq->cpu_load[0] : 0;
+ unsigned long tickless_load = this_rq->cpu_load[0];
int i, scale;
this_rq->nr_load_updates++;
@@ -4567,17 +4567,9 @@ static void __cpu_load_update(struct rq *this_rq, unsigned long this_load,
sched_avg_update(this_rq);
}
-/* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
-{
- return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
-}
-
-#ifdef CONFIG_NO_HZ_COMMON
-static void __cpu_load_update_nohz(struct rq *this_rq,
- unsigned long curr_jiffies,
- unsigned long load,
- int active)
+static void cpu_load_update(struct rq *this_rq,
+ unsigned long curr_jiffies,
+ unsigned long load)
{
unsigned long pending_updates;
@@ -4589,10 +4581,17 @@ static void __cpu_load_update_nohz(struct rq *this_rq,
* In the NOHZ_FULL case, we were non-idle, we should consider
* its weighted load.
*/
- __cpu_load_update(this_rq, load, pending_updates, active);
+ __cpu_load_update(this_rq, load, pending_updates);
}
}
+/* Used instead of source_load when we know the type == 0 */
+static unsigned long weighted_cpuload(const int cpu)
+{
+ return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
+}
+
+#ifdef CONFIG_NO_HZ_COMMON
/*
* There is no sane way to deal with nohz on smp when using jiffies because the
* cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
@@ -4618,26 +4617,43 @@ static void cpu_load_update_idle(struct rq *this_rq)
if (weighted_cpuload(cpu_of(this_rq)))
return;
- __cpu_load_update_nohz(this_rq, READ_ONCE(jiffies), 0, 0);
+ cpu_load_update(this_rq, READ_ONCE(jiffies), 0);
}
/*
- * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ * Record CPU load on nohz entry so we know the tickless load to account
+ * on nohz exit.
*/
-void cpu_load_update_nohz(int active)
+void cpu_load_update_nohz_start(void)
{
struct rq *this_rq = this_rq();
+
+ /*
+ * This is all lockless but should be fine. If weighted_cpuload changes
+ * concurrently we'll exit nohz. And cpu_load write can race with
+ * cpu_load_update_idle() but both updater would be writing the same.
+ */
+ this_rq->cpu_load[0] = weighted_cpuload(cpu_of(this_rq));
+}
+
+/*
+ * Account the tickless load in the end of a nohz frame.
+ */
+void cpu_load_update_nohz_stop(void)
+{
unsigned long curr_jiffies = READ_ONCE(jiffies);
- unsigned long load = active ? weighted_cpuload(cpu_of(this_rq)) : 0;
+ struct rq *this_rq = this_rq();
+ unsigned long load;
if (curr_jiffies == this_rq->last_load_update_tick)
return;
+ load = weighted_cpuload(cpu_of(this_rq));
raw_spin_lock(&this_rq->lock);
- __cpu_load_update_nohz(this_rq, curr_jiffies, load, active);
+ cpu_load_update(this_rq, curr_jiffies, load);
raw_spin_unlock(&this_rq->lock);
}
-#endif /* CONFIG_NO_HZ */
+#endif /* CONFIG_NO_HZ_COMMON */
/*
* Called from scheduler_tick()
@@ -4645,11 +4661,8 @@ void cpu_load_update_nohz(int active)
void cpu_load_update_active(struct rq *this_rq)
{
unsigned long load = weighted_cpuload(cpu_of(this_rq));
- /*
- * See the mess around cpu_load_update_idle() / cpu_load_update_nohz().
- */
- this_rq->last_load_update_tick = jiffies;
- __cpu_load_update(this_rq, load, 1, 1);
+
+ cpu_load_update(this_rq, READ_ONCE(jiffies), load);
}
/*
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d62eb77..342110f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -777,6 +777,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
if (!ts->tick_stopped) {
nohz_balance_enter_idle(cpu);
calc_load_enter_idle();
+ cpu_load_update_nohz_start();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
ts->tick_stopped = 1;
@@ -803,11 +804,11 @@ out:
return tick;
}
-static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
+static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- cpu_load_update_nohz(active);
+ cpu_load_update_nohz_stop();
calc_load_exit_idle();
touch_softlockup_watchdog_sched();
@@ -834,7 +835,7 @@ static void tick_nohz_full_update_tick(struct tick_sched *ts)
if (can_stop_full_tick(ts))
tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
else if (ts->tick_stopped)
- tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
+ tick_nohz_restart_sched_tick(ts, ktime_get());
#endif
}
@@ -1025,7 +1026,7 @@ void tick_nohz_idle_exit(void)
tick_nohz_stop_idle(ts, now);
if (ts->tick_stopped) {
- tick_nohz_restart_sched_tick(ts, now, 0);
+ tick_nohz_restart_sched_tick(ts, now);
tick_nohz_account_idle_ticks(ts);
}
--
2.7.0
Powered by blists - more mailing lists