[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1356028391-14427-4-git-send-email-fweisbec@gmail.com>
Date: Thu, 20 Dec 2012 19:32:50 +0100
From: Frederic Weisbecker <fweisbec@...il.com>
To: LKML <linux-kernel@...r.kernel.org>
Cc: Frederic Weisbecker <fweisbec@...il.com>,
Alessio Igor Bogani <abogani@...nel.org>,
Andrew Morton <akpm@...ux-foundation.org>,
Avi Kivity <avi@...hat.com>,
Chris Metcalf <cmetcalf@...era.com>,
Christoph Lameter <cl@...ux.com>,
Geoff Levand <geoff@...radead.org>,
Gilad Ben Yossef <gilad@...yossef.com>,
Hakan Akkan <hakanakkan@...il.com>,
Ingo Molnar <mingo@...nel.org>,
"Paul E. McKenney" <paulmck@...ux.vnet.ibm.com>,
Paul Gortmaker <paul.gortmaker@...driver.com>,
Peter Zijlstra <peterz@...radead.org>,
Steven Rostedt <rostedt@...dmis.org>,
Thomas Gleixner <tglx@...utronix.de>
Subject: [PATCH 03/24] cputime: Allow dynamic switch between tick/virtual based cputime accounting
Allow to dynamically switch between tick and virtual based cputime accounting.
This way we can provide a kind of "on-demand" virtual based cputime
accounting. In this mode, the kernel will rely on the user hooks
subsystem to dynamically hook on kernel boundaries.
This is in preparation for beeing able to stop the timer tick further
idle. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING which makes
it possible to account the cputime without the tick by hooking on
kernel/user boundaries.
Depending whether the tick is stopped or not, we can switch between
tick and vtime based accounting anytime in order to minimize the
overhead associated to user hooks.
Signed-off-by: Frederic Weisbecker <fweisbec@...il.com>
Cc: Alessio Igor Bogani <abogani@...nel.org>
Cc: Andrew Morton <akpm@...ux-foundation.org>
Cc: Avi Kivity <avi@...hat.com>
Cc: Chris Metcalf <cmetcalf@...era.com>
Cc: Christoph Lameter <cl@...ux.com>
Cc: Geoff Levand <geoff@...radead.org>
Cc: Gilad Ben Yossef <gilad@...yossef.com>
Cc: Hakan Akkan <hakanakkan@...il.com>
Cc: Ingo Molnar <mingo@...nel.org>
Cc: Paul E. McKenney <paulmck@...ux.vnet.ibm.com>
Cc: Paul Gortmaker <paul.gortmaker@...driver.com>
Cc: Peter Zijlstra <peterz@...radead.org>
Cc: Steven Rostedt <rostedt@...dmis.org>
Cc: Thomas Gleixner <tglx@...utronix.de>
---
include/linux/kernel_stat.h | 2 +-
include/linux/sched.h | 4 +-
include/linux/vtime.h | 9 +++++++
init/Kconfig | 6 ++++
kernel/fork.c | 2 +-
kernel/sched/cputime.c | 57 ++++++++++++++++++++++++++++---------------
kernel/time/tick-sched.c | 5 +++-
7 files changed, 60 insertions(+), 25 deletions(-)
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 66b7078..ed5f6ed 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -127,7 +127,7 @@ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t)
extern void account_steal_time(cputime_t);
extern void account_idle_time(cputime_t);
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
static inline void account_process_tick(struct task_struct *tsk, int user)
{
vtime_account_user(tsk);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 651b51a..547c1f0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -597,7 +597,7 @@ struct signal_struct {
cputime_t utime, stime, cutime, cstime;
cputime_t gtime;
cputime_t cgtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
#endif
unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
@@ -1357,7 +1357,7 @@ struct task_struct {
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
struct cputime prev_cputime;
#endif
unsigned long nvcsw, nivcsw; /* context switch counts */
diff --git a/include/linux/vtime.h b/include/linux/vtime.h
index 58392aa..e57020d 100644
--- a/include/linux/vtime.h
+++ b/include/linux/vtime.h
@@ -10,11 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk);
extern void vtime_account_idle(struct task_struct *tsk);
extern void vtime_account_user(struct task_struct *tsk);
extern void vtime_account(struct task_struct *tsk);
+
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
+extern bool vtime_accounting(void);
#else
+static inline bool vtime_accounting(void) { return true; }
+#endif
+
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING */
static inline void vtime_task_switch(struct task_struct *prev) { }
static inline void vtime_account_system(struct task_struct *tsk) { }
static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { }
+static inline void vtime_account_user(struct task_struct *tsk) { }
static inline void vtime_account(struct task_struct *tsk) { }
+static inline bool vtime_accounting(void) { return false; }
#endif
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
diff --git a/init/Kconfig b/init/Kconfig
index a64b3e8..9d7000a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -342,6 +342,7 @@ config VIRT_CPU_ACCOUNTING
bool "Deterministic task and CPU time accounting"
depends on HAVE_VIRT_CPU_ACCOUNTING || HAVE_CONTEXT_TRACKING
select VIRT_CPU_ACCOUNTING_GEN if !HAVE_VIRT_CPU_ACCOUNTING
+ select VIRT_CPU_ACCOUNTING_NATIVE if HAVE_VIRT_CPU_ACCOUNTING
default y if PPC64
help
Select this option to enable more accurate task and CPU time
@@ -367,11 +368,16 @@ endchoice
config VIRT_CPU_ACCOUNTING_GEN
select CONTEXT_TRACKING
+ depends on VIRT_CPU_ACCOUNTING && HAVE_CONTEXT_TRACKING
bool
help
Implement a generic virtual based cputime accounting by using
the context tracking subsystem.
+config VIRT_CPU_ACCOUNTING_NATIVE
+ depends on VIRT_CPU_ACCOUNTING && HAVE_VIRT_CPU_ACCOUNTING
+ bool
+
config BSD_PROCESS_ACCT
bool "BSD Process Accounting"
help
diff --git a/kernel/fork.c b/kernel/fork.c
index 3c31e87..a81efb8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1221,7 +1221,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->utime = p->stime = p->gtime = 0;
p->utimescaled = p->stimescaled = 0;
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
p->prev_cputime.utime = p->prev_cputime.stime = 0;
#endif
#if defined(SPLIT_RSS_COUNTING)
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index da0a9e7..e1fcab4 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -317,8 +317,6 @@ out:
rcu_read_unlock();
}
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
-
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
/*
* Account a tick to a process and cpustat
@@ -388,6 +386,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
struct rq *rq) {}
#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Account a single tick of cpu time.
* @p: the process that the cpu time gets accounted to
@@ -398,6 +397,11 @@ void account_process_tick(struct task_struct *p, int user_tick)
cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
struct rq *rq = this_rq();
+ if (vtime_accounting()) {
+ vtime_account_user(p);
+ return;
+ }
+
if (sched_clock_irqtime) {
irqtime_account_process_tick(p, user_tick, rq);
return;
@@ -439,29 +443,13 @@ void account_idle_ticks(unsigned long ticks)
account_idle_time(jiffies_to_cputime(ticks));
}
-
#endif
+
/*
* Use precise platform statistics if available:
*/
#ifdef CONFIG_VIRT_CPU_ACCOUNTING
-void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- *ut = p->utime;
- *st = p->stime;
-}
-
-void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
-{
- struct task_cputime cputime;
-
- thread_group_cputime(p, &cputime);
-
- *ut = cputime.utime;
- *st = cputime.stime;
-}
-
void vtime_account_system_irqsafe(struct task_struct *tsk)
{
unsigned long flags;
@@ -517,8 +505,25 @@ void vtime_account(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account);
#endif /* __ARCH_HAS_VTIME_ACCOUNT */
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
-#else
+#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
+void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ *ut = p->utime;
+ *st = p->stime;
+}
+
+void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
+{
+ struct task_cputime cputime;
+
+ thread_group_cputime(p, &cputime);
+
+ *ut = cputime.utime;
+ *st = cputime.stime;
+}
+#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifndef nsecs_to_cputime
# define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs)
@@ -548,6 +553,12 @@ static void cputime_adjust(struct task_cputime *curr,
{
cputime_t rtime, utime, total;
+ if (vtime_accounting()) {
+ *ut = curr->utime;
+ *st = curr->stime;
+ return;
+ }
+
utime = curr->utime;
total = utime + curr->stime;
@@ -601,6 +612,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime
thread_group_cputime(p, &cputime);
cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
}
+#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
static DEFINE_PER_CPU(long, last_jiffies) = INITIAL_JIFFIES;
@@ -642,6 +654,11 @@ void vtime_account_idle(struct task_struct *tsk)
account_idle_time(delta_cpu);
}
+bool vtime_accounting(void)
+{
+ return context_tracking_active();
+}
+
static int __cpuinit vtime_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fb8e5e4..ad0e6fa 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -632,8 +632,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
{
-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
unsigned long ticks;
+
+ if (vtime_accounting())
+ return;
/*
* We stopped the tick in idle. Update process times would miss the
* time we slept as update_process_times does only a 1 tick
--
1.7.5.4
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists