lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1386377164-2223-1-git-send-email-andi@firstfloor.org>
Date:	Fri,  6 Dec 2013 16:46:04 -0800
From:	Andi Kleen <andi@...stfloor.org>
To:	linux-kernel@...r.kernel.org
Cc:	x86@...nel.org, Andi Kleen <ak@...ux.intel.com>, lenb@...nel.org,
	tglx@...utronix.de, peterz@...radead.org
Subject: [PATCH] Trace idle entry and exit times

From: Andi Kleen <ak@...ux.intel.com>

This patch creates 4 new trace points: one at the beginning of idle,
one before entering mwait, one after exiting mwait, and one after
finishing the idle code. This is useful to trace down any
additional latencies caused by the ever-growing idle code.

Idle latencies are a common source of performance problems, so it's
important to be able to measure them.

I moved mwait out of line to avoid a nasty include loop.

To use:

trace-cmd record -e power:cpu_idle_end -e power:cpu_idle_start -e power:cpu_idle_mwait_end -e power:cpu_idle_mwait_start
run workload
Ctrl-C
trace-cmd report

Compute per CPU intervals between
power:cpu_idle_start and power:cpu_idle_mwait_start
to get the entry latency

and

power:cpu_idle_mwait_end and power:cpu_idle_end
to get the exit latency (likely more important)

Cc: lenb@...nel.org
Cc: tglx@...utronix.de
Cc: peterz@...radead.org
Signed-off-by: Andi Kleen <ak@...ux.intel.com>
---
 arch/x86/include/asm/processor.h |  7 +------
 arch/x86/kernel/process.c        |  9 +++++++++
 include/trace/events/power.h     | 30 ++++++++++++++++++++++++++++++
 kernel/cpu/idle.c                |  3 +++
 4 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4..22deb08 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -708,12 +708,7 @@ static inline void __monitor(const void *eax, unsigned long ecx,
 		     :: "a" (eax), "c" (ecx), "d"(edx));
 }
 
-static inline void __mwait(unsigned long eax, unsigned long ecx)
-{
-	/* "mwait %eax, %ecx;" */
-	asm volatile(".byte 0x0f, 0x01, 0xc9;"
-		     :: "a" (eax), "c" (ecx));
-}
+extern void __mwait(unsigned long eax, unsigned long ecx);
 
 static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
 {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 3fb8d95..9083e34 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -469,3 +469,12 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 }
 
+void __mwait(unsigned long eax, unsigned long ecx)
+{
+	trace_cpu_idle_mwait_start(0);
+	/* "mwait %eax, %ecx;" */
+	asm volatile(".byte 0x0f, 0x01, 0xc9;"
+		     :: "a" (eax), "c" (ecx));
+	trace_cpu_idle_mwait_end(0);
+}
+EXPORT_SYMBOL(__mwait);
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index cda100d..71261a7 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -35,6 +35,36 @@ DEFINE_EVENT(cpu, cpu_idle,
 	TP_ARGS(state, cpu_id)
 );
 
+/* Dummy argument to work around ftrace's lack of no argument support */
+
+TRACE_EVENT(cpu_idle_start,
+	TP_PROTO(int x),
+	TP_ARGS(x),
+	TP_STRUCT__entry(__array(int, x, 0)),
+	TP_fast_assign((void)(x)),
+	TP_printk("%s", ""))
+
+TRACE_EVENT(cpu_idle_mwait_start,
+	TP_PROTO(int x),
+	TP_ARGS(x),
+	TP_STRUCT__entry(__array(int, x, 0)),
+	TP_fast_assign((void)(x)),
+	TP_printk("%s", ""));
+
+TRACE_EVENT(cpu_idle_mwait_end,
+	TP_PROTO(int x),
+	TP_ARGS(x),
+	TP_STRUCT__entry(__array(int, x, 0)),
+	TP_fast_assign((void)(x)),
+	TP_printk("%s", ""));
+
+TRACE_EVENT(cpu_idle_end,
+	TP_PROTO(int x),
+	TP_ARGS(x),
+	TP_STRUCT__entry(__array(int, x, 0)),
+	TP_fast_assign((void)(x)),
+	TP_printk("%s", ""));
+
 /* This file can get included multiple times, TRACE_HEADER_MULTI_READ at top */
 #ifndef _PWR_EVENT_AVOID_DOUBLE_DEFINING
 #define _PWR_EVENT_AVOID_DOUBLE_DEFINING
diff --git a/kernel/cpu/idle.c b/kernel/cpu/idle.c
index 988573a..1cf1676 100644
--- a/kernel/cpu/idle.c
+++ b/kernel/cpu/idle.c
@@ -6,6 +6,7 @@
 #include <linux/tick.h>
 #include <linux/mm.h>
 #include <linux/stackprotector.h>
+#include <trace/events/power.h>
 
 #include <asm/tlb.h>
 
@@ -68,6 +69,7 @@ void __weak arch_cpu_idle(void)
 static void cpu_idle_loop(void)
 {
 	while (1) {
+		trace_cpu_idle_start(0);
 		tick_nohz_idle_enter();
 
 		while (!need_resched()) {
@@ -114,6 +116,7 @@ static void cpu_idle_loop(void)
 				set_preempt_need_resched();
 		}
 		tick_nohz_idle_exit();
+		trace_cpu_idle_end(0);
 		schedule_preempt_disabled();
 	}
 }
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ