lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1473166148-8466-1-git-send-email-sgruszka@redhat.com>
Date:   Tue,  6 Sep 2016 14:49:08 +0200
From:   Stanislaw Gruszka <sgruszka@...hat.com>
To:     linux-kernel@...r.kernel.org
Cc:     Giovanni Gherdovich <ggherdovich@...e.cz>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Mel Gorman <mgorman@...hsingularity.net>,
        Mike Galbraith <mgalbraith@...e.de>,
        Paolo Bonzini <pbonzini@...hat.com>,
        Peter Zijlstra <peterz@...radead.org>,
        Rik van Riel <riel@...hat.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Wanpeng Li <wanpeng.li@...mail.com>,
        Ingo Molnar <mingo@...nel.org>,
        Stanislaw Gruszka <sgruszka@...hat.com>
Subject: [PATCH v3] sched/cputime: Protect some other sum_exec_runtime reads on 32 bit cpus

We protect 64bit sum_exec_runtime variable against inconsistency on
32 bit architectures when reading it on thread_group_cputime(), but we
also read it on other places not protected.

Patch changes that, except places where sum_exec_runtime is read for
debuging purpose and where we also read some other u64 tsk->se.X or
tsk->sched_info.X variables without protection like on
kernel/delayacct.c, fs/proc/base.c and kernel/sched/debug.c . But since
those are for debug or for printing statistics purpose and inconsistency
is very unlike, we can ignore that.

Patch fixes places were inconsistency can results of cpuclock
monotonicity breakage or cputimer expire too early or too late.

Signed-off-by: Stanislaw Gruszka <sgruszka@...hat.com>
---
v2 -> v3:
 - don't drop rq->lock
 - don't use read_sum_exec_runtime() where we read other 64bit variables
   without protection for debug/statistics purpose

 include/linux/sched.h          |  9 +++++++++
 kernel/exit.c                  |  2 +-
 kernel/sched/core.c            | 18 ++++++++++++++++++
 kernel/sched/cputime.c         | 22 +---------------------
 kernel/time/posix-cpu-timers.c |  4 ++--
 5 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb64fcd..0a7c87e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2508,6 +2508,15 @@ static inline void disable_sched_clock_irqtime(void) {}
 extern unsigned long long
 task_sched_runtime(struct task_struct *task);
 
+#ifdef CONFIG_64BIT
+static inline u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	return t->se.sum_exec_runtime;
+}
+#else
+extern u64 read_sum_exec_runtime(struct task_struct *t);
+#endif
+
 /* sched_exec is called by processes performing an exec */
 #ifdef CONFIG_SMP
 extern void sched_exec(void);
diff --git a/kernel/exit.c b/kernel/exit.c
index 2f974ae..a46f96f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -134,7 +134,7 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->inblock += task_io_get_inblock(tsk);
 	sig->oublock += task_io_get_oublock(tsk);
 	task_io_accounting_add(&sig->ioac, &tsk->ioac);
-	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
+	sig->sum_sched_runtime += read_sum_exec_runtime(tsk);
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
 	write_sequnlock(&sig->stats_lock);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 556cb07..1bae666 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3034,6 +3034,24 @@ unsigned long long task_sched_runtime(struct task_struct *p)
 	return ns;
 }
 
+#ifndef CONFIG_64BIT
+/*
+ * Protect 64bit read against concurrent write in update_curr().
+ */
+u64 read_sum_exec_runtime(struct task_struct *t)
+{
+	u64 ns;
+	struct rq_flags rf;
+	struct rq *rq;
+
+	rq = task_rq_lock(t, &rf);
+	ns = t->se.sum_exec_runtime;
+	task_rq_unlock(rq, t, &rf);
+
+	return ns;
+}
+#endif
+
 /*
  * This function gets called by the timer code, with HZ frequency.
  * We call it with interrupts disabled.
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index b93c72d..4d080f2 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -306,26 +306,6 @@ static inline cputime_t account_other_time(cputime_t max)
 	return accounted;
 }
 
-#ifdef CONFIG_64BIT
-static inline u64 read_sum_exec_runtime(struct task_struct *t)
-{
-	return t->se.sum_exec_runtime;
-}
-#else
-static u64 read_sum_exec_runtime(struct task_struct *t)
-{
-	u64 ns;
-	struct rq_flags rf;
-	struct rq *rq;
-
-	rq = task_rq_lock(t, &rf);
-	ns = t->se.sum_exec_runtime;
-	task_rq_unlock(rq, t, &rf);
-
-	return ns;
-}
-#endif
-
 /*
  * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
  * tasks (sum on group iteration) belonging to @tsk's group.
@@ -702,7 +682,7 @@ out:
 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime = {
-		.sum_exec_runtime = p->se.sum_exec_runtime,
+		.sum_exec_runtime = read_sum_exec_runtime(p),
 	};
 
 	task_cputime(p, &cputime.utime, &cputime.stime);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 39008d7..a2d753b 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -848,7 +848,7 @@ static void check_thread_timers(struct task_struct *tsk,
 	tsk_expires->virt_exp = expires_to_cputime(expires);
 
 	tsk_expires->sched_exp = check_timers_list(++timers, firing,
-						   tsk->se.sum_exec_runtime);
+						   read_sum_exec_runtime(tsk));
 
 	/*
 	 * Check for the special case thread timers.
@@ -1115,7 +1115,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 		struct task_cputime task_sample;
 
 		task_cputime(tsk, &task_sample.utime, &task_sample.stime);
-		task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
+		task_sample.sum_exec_runtime = read_sum_exec_runtime(tsk);
 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
 			return 1;
 	}
-- 
1.8.3.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ