lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Wed,  2 Apr 2014 22:17:52 +0200
From:	Denys Vlasenko <dvlasenk@...hat.com>
To:	linux-kernel@...r.kernel.org
Cc:	Denys Vlasenko <dvlasenk@...hat.com>,
	Frederic Weisbecker <fweisbec@...il.com>,
	Hidetoshi Seto <seto.hidetoshi@...fujitsu.com>,
	Fernando Luis Vazquez Cao <fernando_b1@....ntt.co.jp>,
	Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>,
	Thomas Gleixner <tglx@...utronix.de>,
	Ingo Molnar <mingo@...nel.org>,
	Peter Zijlstra <peterz@...radead.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Arjan van de Ven <arjan@...ux.intel.com>,
	Oleg Nesterov <oleg@...hat.com>
Subject: [PATCH 2/2] nohz: Synchronize sleep time stats with memory barriers

When some call site uses get_cpu_*_time_us() to read a sleeptime
stat, it deduces the total sleeptime by adding the pending time
to the last sleeptime snapshot if the CPU target is idle.

But this only works if idle_sleeptime, idle_entrytime and idle_active are
read and updated under some disciplined order.

This patch changes updaters to modify idle_entrytime,
{idle,iowait}_sleeptime, and idle_active exactly in this order,
with write barriers on SMP to ensure other CPUs see then in this order too.
Readers are changed read them in the opposite order, with read barriers.
When readers detect a race by seeing cleared idle_entrytime,
they retry the reads.

The "iowait-ness" of every idle period is decided at the moment it starts:
if this CPU's run-queue had tasks waiting on I/O, then this idle
period's duration will be added to iowait_sleeptime.
This, along with proper SMP syncronization, fixes the bug where iowait
counts could go backwards.

Signed-off-by: Denys Vlasenko <dvlasenk@...hat.com>
Cc: Frederic Weisbecker <fweisbec@...il.com>
Cc: Hidetoshi Seto <seto.hidetoshi@...fujitsu.com>
Cc: Fernando Luis Vazquez Cao <fernando_b1@....ntt.co.jp>
Cc: Tetsuo Handa <penguin-kernel@...ove.SAKURA.ne.jp>
Cc: Thomas Gleixner <tglx@...utronix.de>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Arjan van de Ven <arjan@...ux.intel.com>
Cc: Oleg Nesterov <oleg@...hat.com>
---
 kernel/time/tick-sched.c | 79 +++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 62 insertions(+), 17 deletions(-)

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 73ced0c4..ed0c1bd 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -409,10 +409,15 @@ static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 
 	/* Updates the per cpu time idle statistics counters */
 	delta = ktime_sub(now, ts->idle_entrytime);
-	if (nr_iowait_cpu(smp_processor_id()) > 0)
+	ts->idle_entrytime.tv64 = 0;
+	smp_wmb();
+
+	if (ts->idle_active == 2)
 		ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
 	else
 		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+
+	smp_wmb();
 	ts->idle_active = 0;
 
 	sched_clock_idle_wakeup_event(0);
@@ -423,7 +428,8 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
 	ktime_t now = ktime_get();
 
 	ts->idle_entrytime = now;
-	ts->idle_active = 1;
+	smp_wmb();
+	ts->idle_active = nr_iowait_cpu(smp_processor_id()) ? 2 : 1;
 	sched_clock_idle_sleep_event();
 	return now;
 }
@@ -444,25 +450,44 @@ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
  */
 u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 {
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-	ktime_t now, idle;
+	struct tick_sched *ts;
+	ktime_t now, count;
+	int active;
 
 	if (!tick_nohz_active)
 		return -1;
 
+	ts = &per_cpu(tick_cpu_sched, cpu);
+
 	now = ktime_get();
 	if (last_update_time)
 		*last_update_time = ktime_to_us(now);
 
-	if (ts->idle_active && !nr_iowait_cpu(cpu)) {
-		ktime_t delta = ktime_sub(now, ts->idle_entrytime);
-		idle = ktime_add(ts->idle_sleeptime, delta);
+ again:
+	active = ACCESS_ONCE(ts->idle_active);
+	smp_rmb();
+	count = ACCESS_ONCE(ts->idle_sleeptime);
+	if (active == 1) {
+		ktime_t delta, start;
+
+		smp_rmb();
+		start = ACCESS_ONCE(ts->idle_entrytime);
+		if (start.tv64 == 0)
+			/* Other CPU is updating the count.
+			 * We don't know whether fetched count is valid.
+			 */
+			goto again;
+
+		delta = ktime_sub(now, start);
+		count = ktime_add(count, delta);
 	} else {
-		idle = ts->idle_sleeptime;
+		/* Possible concurrent tick_nohz_stop_idle() already
+		 * cleared idle_active. We fetched count *after*
+		 * we fetched idle_active, so count must be valid.
+		 */
 	}
 
-	return ktime_to_us(idle);
-
+	return ktime_to_us(count);
 }
 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
 
@@ -482,24 +507,44 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
  */
 u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
 {
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-	ktime_t now, iowait;
+	struct tick_sched *ts;
+	ktime_t now, count;
+	int active;
 
 	if (!tick_nohz_active)
 		return -1;
 
+	ts = &per_cpu(tick_cpu_sched, cpu);
+
 	now = ktime_get();
 	if (last_update_time)
 		*last_update_time = ktime_to_us(now);
 
-	if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
-		ktime_t delta = ktime_sub(now, ts->idle_entrytime);
-		iowait = ktime_add(ts->iowait_sleeptime, delta);
+ again:
+	active = ACCESS_ONCE(ts->idle_active);
+	smp_rmb();
+	count = ACCESS_ONCE(ts->iowait_sleeptime);
+	if (active == 2) {
+		ktime_t delta, start;
+
+		smp_rmb();
+		start = ACCESS_ONCE(ts->idle_entrytime);
+		if (start.tv64 == 0)
+			/* Other CPU is updating the count.
+			 * We don't know whether fetched count is valid.
+			 */
+			goto again;
+
+		delta = ktime_sub(now, start);
+		count = ktime_add(count, delta);
 	} else {
-		iowait = ts->iowait_sleeptime;
+		/* Possible concurrent tick_nohz_stop_idle() already
+		 * cleared idle_active. We fetched count *after*
+		 * we fetched idle_active, so count must be valid.
+		 */
 	}
 
-	return ktime_to_us(iowait);
+	return ktime_to_us(count);
 }
 EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists