lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1190311039.13070.19.camel@peace.smo.corp.google.com>
Date:	Thu, 20 Sep 2007 10:57:19 -0700
From:	Frank Mayhar <fmayhar@...gle.com>
To:	linux-kernel <linux-kernel@...r.kernel.org>
Subject: [PATCH getrusage: return ru_maxrss

Properly support the ru_maxrss field of the rusage structure returned by
getrusage().  This patch includes documentation both of the getrusage()
implementation in general and of the ru_maxrss implementation
specifically.  This implementation matches that of FreeBSD, which is the
only other OS of which I'm aware that implements this field.

Like a number of other folks, we recently had a need for a non-/proc way
of getting the RSS of a process and happened on getrusage().  Unlike the
rest, though, we fixed the system call to do what we want.  I wrote a
wrong implementation and submitted it while I was sick; this time I took
my time and I think I got it right.

A test program that has been run against a number of systems (of which
only FreeBSD and Linux 2.6 with this patch applied passed) is also
available at
	http://www.exit.com/Archives/Linux/

Signed-off-by:  Frank Mayhar <fmayhar@...gle.com>

---
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3de7901..85735af 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -518,6 +518,7 @@ struct signal_struct {
 	unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
 	unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
 	unsigned long inblock, oublock, cinblock, coublock;
+	unsigned long cmaxrss;
 
 	/*
 	 * Cumulative ns of scheduled CPU time for dead threads in the
@@ -1027,6 +1028,8 @@ #endif
 	struct timespec real_start_time;	/* boot based time */
 /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
 	unsigned long min_flt, maj_flt;
+/* maxrss gets the hiwater_rss in do_exit() */
+	unsigned long maxrss;
 
   	cputime_t it_prof_expires, it_virt_expires;
 	unsigned long long it_sched_expires;
diff --git a/kernel/exit.c b/kernel/exit.c
index 06b24b3..390d834 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -127,6 +127,8 @@ static void __exit_signal(struct task_st
 		sig->inblock += task_io_get_inblock(tsk);
 		sig->oublock += task_io_get_oublock(tsk);
 		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
+		if (tsk->maxrss > sig->cmaxrss)
+			sig->cmaxrss = tsk->maxrss;
 		sig = NULL; /* Marker for below. */
 	}
 
@@ -957,6 +959,14 @@ fastcall NORET_TYPE void do_exit(long co
 	if (tsk->mm) {
 		update_hiwater_rss(tsk->mm);
 		update_hiwater_vm(tsk->mm);
+		BUG_ON(tsk->maxrss != 0);
+		/*
+		 * Store the hiwater_rss in a task field, since when we need
+		 * it in __exit_signal() the mm structure is gone; we can't
+		 * stuff it in the signal structure since then we would be
+		 * racing with wait_task_zombie().
+		 */
+		tsk->maxrss = tsk->mm->hiwater_rss;
 	}
 	group_dead = atomic_dec_and_test(&tsk->signal->live);
 	if (group_dead) {
@@ -1265,6 +1275,17 @@ static int wait_task_zombie(struct task_
 		psig->coublock +=
 			task_io_get_oublock(p) +
 			sig->oublock + sig->coublock;
+		/*
+		 * Save the maximum RSS of this task and all its terminated,
+		 * waited-for children.  Don't accumulate the RSS since, one,
+		 * other operating systems (FreeBSD, AIX) do it this way and,
+		 * two, the cumulative RSS of a long-lived process with lots
+		 * of sequential child process would be meaningless.
+		 */
+		if (sig->cmaxrss > psig->cmaxrss)
+			psig->cmaxrss = sig->cmaxrss;
+		if (p->maxrss > psig->cmaxrss)
+			psig->cmaxrss = p->maxrss;
 		spin_unlock_irq(&p->parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 7332e23..fae76dd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -536,6 +536,7 @@ static int copy_mm(unsigned long clone_f
 
 	tsk->min_flt = tsk->maj_flt = 0;
 	tsk->nvcsw = tsk->nivcsw = 0;
+	tsk->maxrss = 0;
 
 	tsk->mm = NULL;
 	tsk->active_mm = NULL;
@@ -881,6 +882,7 @@ static inline int copy_signal(unsigned l
 	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
 	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
 	sig->sum_sched_runtime = 0;
+	sig->cmaxrss = 0;
 	INIT_LIST_HEAD(&sig->cpu_timers[0]);
 	INIT_LIST_HEAD(&sig->cpu_timers[1]);
 	INIT_LIST_HEAD(&sig->cpu_timers[2]);
diff --git a/kernel/sys.c b/kernel/sys.c
index 1b33b05..e450bab 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2099,6 +2099,7 @@ static void k_getrusage(struct task_stru
 			r->ru_majflt = p->signal->cmaj_flt;
 			r->ru_inblock = p->signal->cinblock;
 			r->ru_oublock = p->signal->coublock;
+			r->ru_maxrss = p->signal->cmaxrss;
 
 			if (who == RUSAGE_CHILDREN)
 				break;
@@ -2124,11 +2125,15 @@ static void k_getrusage(struct task_stru
 				r->ru_oublock += task_io_get_oublock(t);
 				t = next_thread(t);
 			} while (t != p);
+			update_hiwater_rss(p->mm);
+			if (r->ru_maxrss < p->mm->hiwater_rss)
+				r->ru_maxrss = p->mm->hiwater_rss;
 			break;
 
 		default:
 			BUG();
 	}
+	r->ru_maxrss <<= PAGE_SHIFT - 10; /* Convert from pages to kilobytes. */
 
 	unlock_task_sighand(p, &flags);
 	rcu_read_unlock();

-- 
Frank Mayhar <fmayhar@...gle.com>
Google, Inc.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ