[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20191030133130.GY4097@hirez.programming.kicks-ass.net>
Date: Wed, 30 Oct 2019 14:31:30 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Scott Wood <swood@...hat.com>
Cc: Frederic Weisbecker <frederic@...nel.org>,
Thomas Gleixner <tglx@...utronix.de>,
Ingo Molnar <mingo@...nel.org>,
LKML <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] timers/nohz: Update nohz load even if tick already
stopped
On Wed, Oct 30, 2019 at 03:48:26AM -0500, Scott Wood wrote:
> On Tue, 2019-10-29 at 11:05 +0100, Peter Zijlstra wrote:
> > @@ -3686,6 +3688,7 @@ static void sched_tick_remote(struct work_struct
> > *work)
> > curr->sched_class->task_tick(rq, curr, 0);
> >
> > out_unlock:
> > + calc_load_nohz_remote(cpu);
> > rq_unlock_irq(rq, &rf);
>
> This gets skipped when the cpu is idle, so it still misses the update.
Oh argh! that's a bit radical of the remote tick. The normal tick runs
just fine on idle CPUs, so lets mirror that.
How's this then?
---
diff --git a/include/linux/sched/nohz.h b/include/linux/sched/nohz.h
index 1abe91ff6e4a..6d67e9a5af6b 100644
--- a/include/linux/sched/nohz.h
+++ b/include/linux/sched/nohz.h
@@ -15,9 +15,11 @@ static inline void nohz_balance_enter_idle(int cpu) { }
#ifdef CONFIG_NO_HZ_COMMON
void calc_load_nohz_start(void);
+void calc_load_nohz_remote(struct rq *rq);
void calc_load_nohz_stop(void);
#else
static inline void calc_load_nohz_start(void) { }
+static inline void calc_load_nohz_remote(struct rq *rq) { }
static inline void calc_load_nohz_stop(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index eb42b71faab9..d02d1b8f40af 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3660,21 +3660,17 @@ static void sched_tick_remote(struct work_struct *work)
u64 delta;
int os;
- /*
- * Handle the tick only if it appears the remote CPU is running in full
- * dynticks mode. The check is racy by nature, but missing a tick or
- * having one too much is no big deal because the scheduler tick updates
- * statistics and checks timeslices in a time-independent way, regardless
- * of when exactly it is running.
- */
- if (idle_cpu(cpu) || !tick_nohz_tick_stopped_cpu(cpu))
+ if (!tick_nohz_tick_stopped_cpu(cpu))
goto out_requeue;
rq_lock_irq(rq, &rf);
- curr = rq->curr;
- if (is_idle_task(curr) || cpu_is_offline(cpu))
+ /*
+ * We must not call calc_load_nohz_remote() when not in NOHZ mode.
+ */
+ if (cpu_is_offline(cpu) || !tick_nohz_tick_stopped(cpu))
goto out_unlock;
+ curr = rq->curr;
update_rq_clock(rq);
delta = rq_clock_task(rq) - curr->se.exec_start;
@@ -3685,10 +3681,11 @@ static void sched_tick_remote(struct work_struct *work)
WARN_ON_ONCE(delta > (u64)NSEC_PER_SEC * 3);
curr->sched_class->task_tick(rq, curr, 0);
+ calc_load_nohz_remote(rq);
out_unlock:
rq_unlock_irq(rq, &rf);
-
out_requeue:
+
/*
* Run the remote tick once per second (1Hz). This arbitrary
* frequency is large enough to avoid overload but short enough
diff --git a/kernel/sched/loadavg.c b/kernel/sched/loadavg.c
index 28a516575c18..de22da666ac7 100644
--- a/kernel/sched/loadavg.c
+++ b/kernel/sched/loadavg.c
@@ -231,16 +231,11 @@ static inline int calc_load_read_idx(void)
return calc_load_idx & 1;
}
-void calc_load_nohz_start(void)
+static void calc_load_nohz_fold(struct rq *rq)
{
- struct rq *this_rq = this_rq();
long delta;
- /*
- * We're going into NO_HZ mode, if there's any pending delta, fold it
- * into the pending NO_HZ delta.
- */
- delta = calc_load_fold_active(this_rq, 0);
+ delta = calc_load_fold_active(rq, 0);
if (delta) {
int idx = calc_load_write_idx();
@@ -248,6 +243,24 @@ void calc_load_nohz_start(void)
}
}
+void calc_load_nohz_start(void)
+{
+ /*
+ * We're going into NO_HZ mode, if there's any pending delta, fold it
+ * into the pending NO_HZ delta.
+ */
+ calc_load_nohz_fold(this_rq());
+}
+
+/*
+ * Keep track of the load for NOHZ_FULL, must be called between
+ * calc_load_nohz_{start,stop}().
+ */
+void calc_load_nohz_remote(struct rq *rq)
+{
+ calc_load_nohz_fold(rq);
+}
+
void calc_load_nohz_stop(void)
{
struct rq *this_rq = this_rq();
@@ -268,7 +281,7 @@ void calc_load_nohz_stop(void)
this_rq->calc_load_update += LOAD_FREQ;
}
-static long calc_load_nohz_fold(void)
+static long calc_load_nohz_read(void)
{
int idx = calc_load_read_idx();
long delta = 0;
@@ -323,7 +336,7 @@ static void calc_global_nohz(void)
}
#else /* !CONFIG_NO_HZ_COMMON */
-static inline long calc_load_nohz_fold(void) { return 0; }
+static inline long calc_load_nohz_read(void) { return 0; }
static inline void calc_global_nohz(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
@@ -346,7 +359,7 @@ void calc_global_load(unsigned long ticks)
/*
* Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
*/
- delta = calc_load_nohz_fold();
+ delta = calc_load_nohz_read();
if (delta)
atomic_long_add(delta, &calc_load_tasks);
Powered by blists - more mailing lists