lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1545256.RXd5dazKu1@aspire.rjw.lan>
Date:   Tue, 06 Mar 2018 10:10:35 +0100
From:   "Rafael J. Wysocki" <rjw@...ysocki.net>
To:     Peter Zijlstra <peterz@...radead.org>,
        Linux PM <linux-pm@...r.kernel.org>
Cc:     Thomas Gleixner <tglx@...utronix.de>,
        Frederic Weisbecker <fweisbec@...il.com>,
        Paul McKenney <paulmck@...ux.vnet.ibm.com>,
        Thomas Ilsche <thomas.ilsche@...dresden.de>,
        Doug Smythies <dsmythies@...us.net>,
        Rik van Riel <riel@...riel.com>,
        Aubrey Li <aubrey.li@...ux.intel.com>,
        Mike Galbraith <mgalbraith@...e.de>,
        LKML <linux-kernel@...r.kernel.org>
Subject: [RFC/RFT][PATCH v2 5/6] sched: idle: Select idle state before stopping the tick

From: Rafael J. Wysocki <rafael.j.wysocki@...el.com>

In order to address the issue with short idle duration predictions
by the idle governor after the tick has been stopped, reorder the
code in cpuidle_idle_call() so that the governor idle state selection
runs before tick_nohz_idle_go_idle() and use the "nohz" hint returned
by cpuidle_select() to tell tick_nohz_idle_go_idle() whether or not
to stop the tick.

This isn't straightforward, because menu_predict() invokes
tick_nohz_get_sleep_length() to get the time to the next timer
event and the number returned by the latter comes from
__tick_nohz_idle_enter().  Fortunately, however, it is possible
to compute that number without actually stopping the tick and with
the help of the existing code.

Namely, notice that tick_nohz_stop_sched_tick() already computes the
next timer event time to reprogram the scheduler tick hrtimer and
that time can be used as a proxy for the actual next timer event
time in the idle duration predicition.

Accordingly, rename the original tick_nohz_stop_sched_tick() to
__tick_nohz_next_event() and add the stop_tick argument indicating
whether or not to stop the tick to it.  If that argument is 'true',
the function will work like the original tick_nohz_stop_sched_tick(),
but otherwise it will just compute the next event time without
stopping the tick.  Next, redefine tick_nohz_stop_sched_tick() as
a wrapper around the new function.

Following that, make tick_nohz_get_sleep_length() call
__tick_nohz_next_event() to compute the next timer event time
and make it use the new last_jiffies_update field in struct
tick_sched to tell __tick_nohz_idle_enter() to skip some code
that has run already.

[After this change the __tick_nohz_next_event() code computing the
 next event time will run twice in a row if the expected idle period
 duration coming from cpuidle_select() is large enough which is sort
 of ugly, but the next set of changes deals with that separately.
 To do that, it uses the value of the last_jiffies_update field in
 struct tick_sched introduced here, among other things.]

Finally, drop the now redundant sleep_length field from struct
tick_sched.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@...el.com>
---

-> v2: Use the "nohz" hint from cpuidle_select() instead of the expected
       idle duration.

---
 kernel/sched/idle.c      |    7 ++---
 kernel/time/tick-sched.c |   64 +++++++++++++++++++++++++++++++++--------------
 kernel/time/tick-sched.h |    3 --
 3 files changed, 50 insertions(+), 24 deletions(-)

Index: linux-pm/kernel/sched/idle.c
===================================================================
--- linux-pm.orig/kernel/sched/idle.c
+++ linux-pm/kernel/sched/idle.c
@@ -188,13 +188,14 @@ static void cpuidle_idle_call(void)
 	} else {
 		bool nohz = true;
 
-		tick_nohz_idle_go_idle(true);
-		rcu_idle_enter();
-
 		/*
 		 * Ask the cpuidle framework to choose a convenient idle state.
 		 */
 		next_state = cpuidle_select(drv, dev, &nohz);
+
+		tick_nohz_idle_go_idle(nohz);
+		rcu_idle_enter();
+
 		entered_state = call_cpuidle(drv, dev, next_state);
 		/*
 		 * Give the governor an opportunity to reflect on the outcome
Index: linux-pm/kernel/time/tick-sched.c
===================================================================
--- linux-pm.orig/kernel/time/tick-sched.c
+++ linux-pm/kernel/time/tick-sched.c
@@ -655,8 +655,8 @@ static inline bool local_timer_softirq_p
 	return local_softirq_pending() & TIMER_SOFTIRQ;
 }
 
-static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
-					 ktime_t now, int cpu)
+static ktime_t __tick_nohz_next_event(struct tick_sched *ts, int cpu,
+				      bool stop_tick)
 {
 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 	u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
@@ -670,6 +670,7 @@ static ktime_t tick_nohz_stop_sched_tick
 		basejiff = jiffies;
 	} while (read_seqretry(&jiffies_lock, seq));
 	ts->last_jiffies = basejiff;
+	ts->last_jiffies_update = basemono;
 
 	/*
 	 * Keep the periodic tick, when RCU, architecture or irq_work
@@ -732,8 +733,10 @@ static ktime_t tick_nohz_stop_sched_tick
 	 */
 	delta = timekeeping_max_deferment();
 	if (cpu == tick_do_timer_cpu) {
-		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
-		ts->do_timer_last = 1;
+		if (stop_tick) {
+			tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+			ts->do_timer_last = 1;
+		}
 	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
 		delta = KTIME_MAX;
 		ts->do_timer_last = 0;
@@ -756,6 +759,12 @@ static ktime_t tick_nohz_stop_sched_tick
 	expires = min_t(u64, expires, next_tick);
 	tick = expires;
 
+	if (!stop_tick) {
+		/* Undo the effect of get_next_timer_interrupt(). */
+		timer_clear_idle();
+		goto out;
+	}
+
 	/* Skip reprogram of event if its not changed */
 	if (ts->tick_stopped && (expires == ts->next_tick)) {
 		/* Sanity check: make sure clockevent is actually programmed */
@@ -804,14 +813,14 @@ static ktime_t tick_nohz_stop_sched_tick
 	else
 		tick_program_event(tick, 1);
 out:
-	/*
-	 * Update the estimated sleep length until the next timer
-	 * (not only the tick).
-	 */
-	ts->sleep_length = ktime_sub(dev->next_event, now);
 	return tick;
 }
 
+static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts, int cpu)
+{
+	return __tick_nohz_next_event(ts, cpu, true);
+}
+
 static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 {
 	/* Update jiffies first */
@@ -847,7 +856,7 @@ static void tick_nohz_full_update_tick(s
 		return;
 
 	if (can_stop_full_tick(cpu, ts))
-		tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
+		tick_nohz_stop_sched_tick(ts, cpu);
 	else if (ts->tick_stopped)
 		tick_nohz_restart_sched_tick(ts, ktime_get());
 #endif
@@ -873,10 +882,8 @@ static bool can_stop_idle_tick(int cpu,
 		return false;
 	}
 
-	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) {
-		ts->sleep_length = NSEC_PER_SEC / HZ;
+	if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
 		return false;
-	}
 
 	if (need_resched())
 		return false;
@@ -913,17 +920,22 @@ static bool can_stop_idle_tick(int cpu,
 
 static void __tick_nohz_idle_enter(struct tick_sched *ts, bool stop_tick)
 {
-	ktime_t now, expires;
 	int cpu = smp_processor_id();
 
-	now = tick_nohz_start_idle(ts);
+	if (!ts->last_jiffies_update) {
+		/* tick_nohz_get_sleep_length() has not run. */
+		tick_nohz_start_idle(ts);
+		if (!can_stop_idle_tick(cpu, ts))
+			return;
+	}
 
-	if (can_stop_idle_tick(cpu, ts) && stop_tick) {
+	if (stop_tick) {
 		int was_stopped = ts->tick_stopped;
+		ktime_t expires;
 
 		ts->idle_calls++;
 
-		expires = tick_nohz_stop_sched_tick(ts, now, cpu);
+		expires = tick_nohz_stop_sched_tick(ts, cpu);
 		if (expires > 0LL) {
 			ts->idle_sleeps++;
 			ts->idle_expires = expires;
@@ -934,6 +946,8 @@ static void __tick_nohz_idle_enter(struc
 			nohz_balance_enter_idle(cpu);
 		}
 	}
+
+	ts->last_jiffies_update = 0;
 }
 
 void __tick_nohz_idle_prepare(void)
@@ -1013,15 +1027,27 @@ void tick_nohz_irq_exit(void)
 }
 
 /**
- * tick_nohz_get_sleep_length - return the length of the current sleep
+ * tick_nohz_get_sleep_length - return the expected length of the current sleep
  *
  * Called from power state control code with interrupts disabled
  */
 ktime_t tick_nohz_get_sleep_length(void)
 {
 	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
+	int cpu = smp_processor_id();
+	ktime_t now, next_event;
 
-	return ts->sleep_length;
+	now = tick_nohz_start_idle(ts);
+
+	if (can_stop_idle_tick(cpu, ts)) {
+		next_event = __tick_nohz_next_event(ts, cpu, false);
+	} else {
+		struct clock_event_device *dev;
+
+		dev = __this_cpu_read(tick_cpu_device.evtdev);
+		next_event = dev->next_event;
+	}
+	return ktime_sub(next_event, now);;
 }
 
 /**
Index: linux-pm/kernel/time/tick-sched.h
===================================================================
--- linux-pm.orig/kernel/time/tick-sched.h
+++ linux-pm/kernel/time/tick-sched.h
@@ -38,7 +38,6 @@ enum tick_nohz_mode {
  * @idle_exittime:	Time when the idle state was left
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
  * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:	Duration of the current idle sleep
  * @do_timer_lst:	CPU was the last one doing do_timer before going idle
  */
 struct tick_sched {
@@ -58,8 +57,8 @@ struct tick_sched {
 	ktime_t				idle_exittime;
 	ktime_t				idle_sleeptime;
 	ktime_t				iowait_sleeptime;
-	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
+	u64				last_jiffies_update;
 	u64				next_timer;
 	ktime_t				idle_expires;
 	int				do_timer_last;

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ