2.6.26-stable review patch. If anyone has any objections, please let us know. ------------------ From: Balbir Singh commit 49048622eae698e5c4ae61f7e71200f265ccc529 upstream Spencer reported a problem where utime and stime were going negative despite the fixes in commit b27f03d4bdc145a09fb7b0c0e004b29f1ee555fa. The suspected reason for the problem is that signal_struct maintains it's own utime and stime (of exited tasks), these are not updated using the new task_utime() routine, hence sig->utime can go backwards and cause the same problem to occur (sig->utime, adds tsk->utime and not task_utime()). This patch fixes the problem TODO: using max(task->prev_utime, derived utime) works for now, but a more generic solution is to implement cputime_max() and use the cputime_gt() function for comparison. Reported-by: spencer@bluehost.com Signed-off-by: Balbir Singh Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- fs/proc/array.c | 59 -------------------------------------------------- include/linux/sched.h | 4 +++ kernel/exit.c | 6 ++--- kernel/sched.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 62 deletions(-) --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -332,65 +332,6 @@ int proc_pid_status(struct seq_file *m, return 0; } -/* - * Use precise platform statistics if available: - */ -#ifdef CONFIG_VIRT_CPU_ACCOUNTING -static cputime_t task_utime(struct task_struct *p) -{ - return p->utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - return p->stime; -} -#else -static cputime_t task_utime(struct task_struct *p) -{ - clock_t utime = cputime_to_clock_t(p->utime), - total = utime + cputime_to_clock_t(p->stime); - u64 temp; - - /* - * Use CFS's precise accounting: - */ - temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); - - if (total) { - temp *= utime; - do_div(temp, total); - } - utime = (clock_t)temp; - - p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); - return p->prev_utime; -} - -static cputime_t task_stime(struct task_struct *p) -{ - clock_t stime; - - /* - * Use CFS's precise accounting. (we subtract utime from - * the total, to make sure the total observed by userspace - * grows monotonically - apps rely on that): - */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - - cputime_to_clock_t(task_utime(p)); - - if (stime >= 0) - p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); - - return p->prev_stime; -} -#endif - -static cputime_t task_gtime(struct task_struct *p) -{ - return p->gtime; -} - static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1477,6 +1477,10 @@ static inline void put_task_struct(struc __put_task_struct(t); } +extern cputime_t task_utime(struct task_struct *p); +extern cputime_t task_stime(struct task_struct *p); +extern cputime_t task_gtime(struct task_struct *p); + /* * Per process flags */ --- a/kernel/exit.c +++ b/kernel/exit.c @@ -111,9 +111,9 @@ static void __exit_signal(struct task_st * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ - sig->utime = cputime_add(sig->utime, tsk->utime); - sig->stime = cputime_add(sig->stime, tsk->stime); - sig->gtime = cputime_add(sig->gtime, tsk->gtime); + sig->utime = cputime_add(sig->utime, task_utime(tsk)); + sig->stime = cputime_add(sig->stime, task_stime(tsk)); + sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; sig->nvcsw += tsk->nvcsw; --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3995,6 +3995,65 @@ void account_steal_time(struct task_stru } /* + * Use precise platform statistics if available: + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +cputime_t task_utime(struct task_struct *p) +{ + return p->utime; +} + +cputime_t task_stime(struct task_struct *p) +{ + return p->stime; +} +#else +cputime_t task_utime(struct task_struct *p) +{ + clock_t utime = cputime_to_clock_t(p->utime), + total = utime + cputime_to_clock_t(p->stime); + u64 temp; + + /* + * Use CFS's precise accounting: + */ + temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); + + if (total) { + temp *= utime; + do_div(temp, total); + } + utime = (clock_t)temp; + + p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); + return p->prev_utime; +} + +cputime_t task_stime(struct task_struct *p) +{ + clock_t stime; + + /* + * Use CFS's precise accounting. (we subtract utime from + * the total, to make sure the total observed by userspace + * grows monotonically - apps rely on that): + */ + stime = nsec_to_clock_t(p->se.sum_exec_runtime) - + cputime_to_clock_t(task_utime(p)); + + if (stime >= 0) + p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); + + return p->prev_stime; +} +#endif + +inline cputime_t task_gtime(struct task_struct *p) +{ + return p->gtime; +} + +/* * This function gets called by the timer code, with HZ frequency. * We call it with interrupts disabled. * -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/