lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <53a6b86a-12a2-0b30-9233-37e0000f4b2f@osadl.org>
Date:	Thu, 28 Jul 2016 20:51:46 +0200
From:	Carsten Emde <C.Emde@...dl.org>
To:	Binoy Jayan <binoy.jayan@...aro.org>,
	Steven Rostedt <srostedt@...hat.com>
Cc:	linux-kernel@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [PATCH 1/2] tracing: Add latency histograms

Binoy,

> [..]
> This patch provides a recording mechanism to store data of potential
> sources of system latencies. [..]
Thanks a lot! I would be more than happy and certainly will help and test as much
as I can to get this functionality upstream.

Some time ago, I provided a patch to add a context switch time histogram.
Unfortunately, this patch did not make it into the RT patch by some reason.
However, the context switch time is needed to completely elucidate any latency
that is seen from user space. The below patch only applies to the original
version of the histogram patch - not to your version. I am appending it just
for your information. I would be more than happy to convert the patch so it
applies on top of yours. However, I would not do it, if there were no chance
that it ever gets considered. Please let me know.

Thanks,
	-Carsten.


--

From: Carsten Emde <C.Emde@...dl.org>
Date: Fri, 3 Oct 2014 22:14:55 +0100
Subject: Add trace latency histogram to monitor context switch time

The existing two on-line methods to record system latency (timer and
wakeup latency) do not completely reflect the user-space wakeup time,
since the duration of the context switch is not taken into account. This
patch adds two new histograms - one that records the duration of the
context switch and another one that records the sum of the timer delay,
the wakeup latency and the newly available duration of the context
switch. The latter histogram probably is best suitable to determine the
worst-case total preemption latency of a given system.

Signed-off-by: Carsten Emde <C.Emde@...dl.org>

---
 include/linux/sched.h       |    8 
 kernel/trace/Kconfig        |   32 +++
 kernel/trace/latency_hist.c |  427 ++++++++++++++++++++++++++++++++++++++------
 3 files changed, 411 insertions(+), 56 deletions(-)

Index: linux-4.4.9-rt17/include/linux/sched.h
===================================================================
--- linux-4.4.9-rt17.orig/include/linux/sched.h
+++ linux-4.4.9-rt17/include/linux/sched.h
@@ -1828,7 +1828,15 @@ struct task_struct {
 #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
 	long timer_offset;
 #endif
+#endif /* CONFIG_WAKEUP_LATENCY_HIST */
+#ifdef CONFIG_SWITCHTIME_HIST
+	u64 switchtime_timestamp_hist;
+	struct task_struct *switchtime_prev;
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+	unsigned long wakeup_latency;
 #endif
+#endif /* CONFIG_SWITCHTIME_HIST */
 #endif /* CONFIG_TRACING */
 #ifdef CONFIG_MEMCG
 	struct mem_cgroup *memcg_in_oom;
Index: linux-4.4.9-rt17/kernel/trace/Kconfig
===================================================================
--- linux-4.4.9-rt17.orig/kernel/trace/Kconfig
+++ linux-4.4.9-rt17/kernel/trace/Kconfig
@@ -325,6 +325,38 @@ config MISSED_TIMER_OFFSETS_HIST
 
 	      /sys/kernel/debug/tracing/latency_hist/enable/timerandwakeup
 
+config SWITCHTIME_HIST
+	depends on HIGH_RES_TIMERS
+	depends on HAVE_SYSCALL_TRACEPOINTS
+	depends on WAKEUP_LATENCY_HIST
+	select GENERIC_TRACER
+	select FTRACE_SYSCALLS
+	bool "Context Switch Time Histogram"
+	help
+	  Generate a histogram of the context switch duration per CPU. The
+	  histograms are disabled by default. To enable them, write a non-zero
+	  number to
+
+	      /sys/kernel/debug/tracing/latency_hist/enable/switchtime
+
+	  The histogram data will be located in the debug file system at
+
+	      /sys/kernel/debug/tracing/latency_hist/switchtime
+
+	  If both Scheduling Latency Histogram, Missed Timer Offsets and the
+	  newly provided Context Switch Time Histogram are selected, additional
+	  histogram data will be collected from the sum of the wakeup latency,
+	  the timer latency, if available, and the switch time per CPU. These
+	  histograms are available in the
+
+	      /sys/kernel/debug/tracing/latency_hist/timerwakeupswitch
+
+	  directory. They reflect the apparent interrupt and scheduling latency
+	  and are best suitable to determine the worst-case latency of a given
+	  system. To enable these histograms, write a non-zero number to
+
+	      /sys/kernel/debug/tracing/latency_hist/enable/timerwakeupswitch
+
 config ENABLE_DEFAULT_TRACERS
 	bool "Trace process context switches and events"
 	depends on !GENERIC_TRACER
Index: linux-4.4.9-rt17/kernel/trace/latency_hist.c
===================================================================
--- linux-4.4.9-rt17.orig/kernel/trace/latency_hist.c
+++ linux-4.4.9-rt17/kernel/trace/latency_hist.c
@@ -23,11 +23,15 @@
 #include <linux/sched/rt.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
+#include <trace/syscall.h>
 #include <asm/div64.h>
+#include <asm/syscall.h>
 
 #include "trace.h"
 #include <trace/events/sched.h>
 
+extern struct tracepoint __tracepoint_sys_exit;
+
 #define NSECS_PER_USECS 1000L
 
 #define CREATE_TRACE_POINTS
@@ -41,6 +45,8 @@ enum {
 	WAKEUP_LATENCY_SHAREDPRIO,
 	MISSED_TIMER_OFFSETS,
 	TIMERANDWAKEUP_LATENCY,
+	SWITCHTIME,
+	TIMERWAKEUPSWITCH_LATENCY,
 	MAX_LATENCY_TYPE,
 };
 
@@ -104,8 +110,10 @@ struct maxlatproc_data {
 	int prio;
 	int current_prio;
 	long latency;
-	long timeroffset;
+	unsigned long timer_offset;
+	long wakeup_latency;
 	cycle_t timestamp;
+	int nr;
 };
 #endif
 
@@ -144,6 +152,19 @@ static DEFINE_PER_CPU(struct maxlatproc_
 static unsigned long missed_timer_offsets_pid;
 #endif
 
+#ifdef CONFIG_SWITCHTIME_HIST
+static DEFINE_PER_CPU(struct hist_data, switchtime);
+static char *switchtime_dir = "switchtime";
+static notrace void probe_syscall_exit(void *v, struct pt_regs *regs, long ret);
+static struct enable_data switchtime_enabled_data = {
+	.latency_type = SWITCHTIME,
+	.enabled = 0,
+};
+static DEFINE_PER_CPU(struct maxlatproc_data, switchtime_maxlatproc);
+static DEFINE_PER_CPU(struct task_struct *, switchtime_task);
+static unsigned long switchtime_pid;
+#endif
+
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
 	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
 static DEFINE_PER_CPU(struct hist_data, timerandwakeup_latency_hist);
@@ -155,13 +176,26 @@ static struct enable_data timerandwakeup
 static DEFINE_PER_CPU(struct maxlatproc_data, timerandwakeup_maxlatproc);
 #endif
 
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+	defined(CONFIG_SWITCHTIME_HIST)
+static DEFINE_PER_CPU(struct hist_data, timerwakeupswitch_latency_hist);
+static char *timerwakeupswitch_latency_hist_dir = "timerwakeupswitch";
+static struct enable_data timerwakeupswitch_enabled_data = {
+	.latency_type = TIMERWAKEUPSWITCH_LATENCY,
+	.enabled = 0,
+};
+static DEFINE_PER_CPU(struct maxlatproc_data, timerwakeupswitch_maxlatproc);
+#endif
+
 void notrace latency_hist(int latency_type, int cpu, long latency,
-			  long timeroffset, cycle_t stop,
-			  struct task_struct *p)
+			  unsigned long timer_offset, long wakeup_latency,
+			  cycle_t stop, struct task_struct *p, int nr)
 {
 	struct hist_data *my_hist;
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
-	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+	defined(CONFIG_SWITCHTIME_HIST)
 	struct maxlatproc_data *mp = NULL;
 #endif
 
@@ -201,6 +235,12 @@ void notrace latency_hist(int latency_ty
 		mp = &per_cpu(missed_timer_offsets_maxlatproc, cpu);
 		break;
 #endif
+#ifdef CONFIG_SWITCHTIME_HIST
+	case SWITCHTIME:
+		my_hist = &per_cpu(switchtime, cpu);
+		mp = &per_cpu(switchtime_maxlatproc, cpu);
+		break;
+#endif
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
 	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
 	case TIMERANDWAKEUP_LATENCY:
@@ -208,6 +248,14 @@ void notrace latency_hist(int latency_ty
 		mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
 		break;
 #endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+	defined(CONFIG_SWITCHTIME_HIST)
+	case TIMERWAKEUPSWITCH_LATENCY:
+		my_hist = &per_cpu(timerwakeupswitch_latency_hist, cpu);
+		mp = &per_cpu(timerwakeupswitch_maxlatproc, cpu);
+		break;
+#endif
 
 	default:
 		return;
@@ -229,21 +277,38 @@ void notrace latency_hist(int latency_ty
 	if (unlikely(latency > my_hist->max_lat ||
 	    my_hist->min_lat == LONG_MAX)) {
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
-	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+	defined(CONFIG_SWITCHTIME_HIST)
 		if (latency_type == WAKEUP_LATENCY ||
 		    latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
 		    latency_type == MISSED_TIMER_OFFSETS ||
-		    latency_type == TIMERANDWAKEUP_LATENCY) {
-			strncpy(mp->comm, p->comm, sizeof(mp->comm));
-			strncpy(mp->current_comm, current->comm,
-			    sizeof(mp->current_comm));
-			mp->pid = task_pid_nr(p);
-			mp->current_pid = task_pid_nr(current);
-			mp->prio = p->prio;
-			mp->current_prio = current->prio;
+		    latency_type == TIMERANDWAKEUP_LATENCY ||
+		    latency_type == SWITCHTIME ||
+		    latency_type == TIMERWAKEUPSWITCH_LATENCY) {
+			if (latency_type == SWITCHTIME ||
+			    latency_type == TIMERWAKEUPSWITCH_LATENCY) {
+				strncpy(mp->current_comm, p->comm,
+				    sizeof(mp->current_comm));
+				strncpy(mp->comm, current->comm,
+				    sizeof(mp->comm));
+				mp->current_pid = task_pid_nr(p);
+				mp->pid = task_pid_nr(current);
+				mp->current_prio = p->prio;
+				mp->prio = current->prio;
+			} else {
+				strncpy(mp->comm, p->comm, sizeof(mp->comm));
+				strncpy(mp->current_comm, current->comm,
+				    sizeof(mp->current_comm));
+				mp->pid = task_pid_nr(p);
+				mp->current_pid = task_pid_nr(current);
+				mp->prio = p->prio;
+				mp->current_prio = current->prio;
+			}
 			mp->latency = latency;
-			mp->timeroffset = timeroffset;
+			mp->timer_offset = timer_offset;
+			mp->wakeup_latency = wakeup_latency;
 			mp->timestamp = stop;
+			mp->nr = nr;
 		}
 #endif
 		my_hist->max_lat = latency;
@@ -366,8 +431,9 @@ static void clear_maxlatprocdata(struct
 {
 	mp->comm[0] = mp->current_comm[0] = '\0';
 	mp->prio = mp->current_prio = mp->pid = mp->current_pid =
-	    mp->latency = mp->timeroffset = -1;
+	    mp->latency = mp->timer_offset = mp->wakeup_latency = -1;
 	mp->timestamp = 0;
+	mp->nr = 0;
 }
 #endif
 
@@ -426,6 +492,12 @@ latency_hist_reset(struct file *file, co
 			mp = &per_cpu(wakeup_maxlatproc_sharedprio, cpu);
 			break;
 #endif
+#ifdef CONFIG_SWITCHTIME_HIST
+		case SWITCHTIME:
+			hist = &per_cpu(switchtime, cpu);
+			mp = &per_cpu(switchtime_maxlatproc, cpu);
+			break;
+#endif
 #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
 		case MISSED_TIMER_OFFSETS:
 			hist = &per_cpu(missed_timer_offsets, cpu);
@@ -439,15 +511,26 @@ latency_hist_reset(struct file *file, co
 			mp = &per_cpu(timerandwakeup_maxlatproc, cpu);
 			break;
 #endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+	defined(CONFIG_SWITCHTIME_HIST)
+		case TIMERWAKEUPSWITCH_LATENCY:
+			hist = &per_cpu(timerwakeupswitch_latency_hist, cpu);
+			mp = &per_cpu(timerwakeupswitch_maxlatproc, cpu);
+			break;
+#endif
 		}
 
 		hist_reset(hist);
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
-	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+	defined(CONFIG_SWITCHTIME_HIST)
 		if (latency_type == WAKEUP_LATENCY ||
 		    latency_type == WAKEUP_LATENCY_SHAREDPRIO ||
 		    latency_type == MISSED_TIMER_OFFSETS ||
-		    latency_type == TIMERANDWAKEUP_LATENCY)
+		    latency_type == TIMERANDWAKEUP_LATENCY ||
+		    latency_type == SWITCHTIME ||
+		    latency_type == TIMERWAKEUPSWITCH_LATENCY)
 			clear_maxlatprocdata(mp);
 #endif
 	}
@@ -456,7 +539,8 @@ latency_hist_reset(struct file *file, co
 }
 
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) || \
-	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) || \
+	defined(CONFIG_SWITCHTIME_HIST)
 static ssize_t
 show_pid(struct file *file, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
@@ -499,7 +583,7 @@ show_maxlatproc(struct file *file, char
 {
 	int r;
 	struct maxlatproc_data *mp = file->private_data;
-	int strmaxlen = (TASK_COMM_LEN * 2) + (8 * 8);
+	int strmaxlen = (TASK_COMM_LEN * 2) + 32 + (8 * 8);
 	unsigned long long t;
 	unsigned long usecs, secs;
 	char *buf;
@@ -518,10 +602,18 @@ show_maxlatproc(struct file *file, char
 	usecs = do_div(t, USEC_PER_SEC);
 	secs = (unsigned long) t;
 	r = snprintf(buf, strmaxlen,
-	    "%d %d %ld (%ld) %s <- %d %d %s %lu.%06lu\n", mp->pid,
-	    MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timeroffset, mp->comm,
-	    mp->current_pid, MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
+	    "%d %d %ld (%ld,%ld) %s <- %d %d %s %lu.%06lu", mp->pid,
+	    MAX_RT_PRIO-1 - mp->prio, mp->latency, mp->timer_offset,
+	    mp->wakeup_latency, mp->comm, mp->current_pid,
+	    MAX_RT_PRIO-1 - mp->current_prio, mp->current_comm,
 	    secs, usecs);
+#ifdef CONFIG_SWITCHTIME_HIST
+	if (mp->nr >= 0 && mp->nr < NR_syscalls)
+		r += snprintf(buf + r, strmaxlen - r, " %pf",
+		    sys_call_table[mp->nr]);
+#endif
+	r += snprintf(buf + r, strmaxlen - r, "\n");
+
 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
 	kfree(buf);
 	return r;
@@ -569,9 +661,7 @@ do_enable(struct file *file, const char
 			ret = register_trace_preemptirqsoff_hist(
 			    probe_preemptirqsoff_hist, NULL);
 			if (ret) {
-				pr_info("wakeup trace: Couldn't assign "
-				    "probe_preemptirqsoff_hist "
-				    "to trace_preemptirqsoff_hist\n");
+				pr_info("Couldn't register preemptirqsoff_hist probe\n");
 				return ret;
 			}
 			break;
@@ -581,17 +671,13 @@ do_enable(struct file *file, const char
 			ret = register_trace_sched_wakeup(
 			    probe_wakeup_latency_hist_start, NULL);
 			if (ret) {
-				pr_info("wakeup trace: Couldn't assign "
-				    "probe_wakeup_latency_hist_start "
-				    "to trace_sched_wakeup\n");
+				pr_info("Couldn't register sched_wakeup probe\n");
 				return ret;
 			}
 			ret = register_trace_sched_wakeup_new(
 			    probe_wakeup_latency_hist_start, NULL);
 			if (ret) {
-				pr_info("wakeup trace: Couldn't assign "
-				    "probe_wakeup_latency_hist_start "
-				    "to trace_sched_wakeup_new\n");
+				pr_info("Couldn't register sched_wakeup_new probe\n");
 				unregister_trace_sched_wakeup(
 				    probe_wakeup_latency_hist_start, NULL);
 				return ret;
@@ -599,9 +685,7 @@ do_enable(struct file *file, const char
 			ret = register_trace_sched_switch(
 			    probe_wakeup_latency_hist_stop, NULL);
 			if (ret) {
-				pr_info("wakeup trace: Couldn't assign "
-				    "probe_wakeup_latency_hist_stop "
-				    "to trace_sched_switch\n");
+				pr_info("Couldn't register sched_switch probe\n");
 				unregister_trace_sched_wakeup(
 				    probe_wakeup_latency_hist_start, NULL);
 				unregister_trace_sched_wakeup_new(
@@ -611,9 +695,7 @@ do_enable(struct file *file, const char
 			ret = register_trace_sched_migrate_task(
 			    probe_sched_migrate_task, NULL);
 			if (ret) {
-				pr_info("wakeup trace: Couldn't assign "
-				    "probe_sched_migrate_task "
-				    "to trace_sched_migrate_task\n");
+				pr_info("Couldn't register sched_migrate_task probe\n");
 				unregister_trace_sched_wakeup(
 				    probe_wakeup_latency_hist_start, NULL);
 				unregister_trace_sched_wakeup_new(
@@ -629,9 +711,19 @@ do_enable(struct file *file, const char
 			ret = register_trace_hrtimer_interrupt(
 			    probe_hrtimer_interrupt, NULL);
 			if (ret) {
-				pr_info("wakeup trace: Couldn't assign "
-				    "probe_hrtimer_interrupt "
-				    "to trace_hrtimer_interrupt\n");
+				pr_info("Couldn't register hrtimer IRQ probe\n");
+				return ret;
+			}
+			break;
+#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+		case SWITCHTIME:
+			if (!wakeup_latency_enabled_data.enabled)
+				return -EINVAL;
+			ret = tracepoint_probe_register(&__tracepoint_sys_exit,
+			    probe_syscall_exit, NULL);
+			if (ret) {
+				pr_info("Couldn't register sys_exit probe\n");
 				return ret;
 			}
 			break;
@@ -644,6 +736,16 @@ do_enable(struct file *file, const char
 				return -EINVAL;
 			break;
 #endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+	defined(CONFIG_SWITCHTIME_HIST)
+		case TIMERWAKEUPSWITCH_LATENCY:
+			if (!wakeup_latency_enabled_data.enabled ||
+			    !missed_timer_offsets_enabled_data.enabled ||
+			    !switchtime_enabled_data.enabled)
+				return -EINVAL;
+			break;
+#endif
 		default:
 			break;
 		}
@@ -695,6 +797,12 @@ do_enable(struct file *file, const char
 #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
 			timerandwakeup_enabled_data.enabled = 0;
 #endif
+#ifdef CONFIG_SWITCHTIME_HIST
+			switchtime_enabled_data.enabled = 0;
+#endif
+#ifdef CONFIG_TIMERWAKEUPSWITCH_LATENCY_HIST
+			timerwakeupswitch_enabled_data.enabled = 0;
+#endif
 			break;
 #endif
 #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
@@ -704,6 +812,25 @@ do_enable(struct file *file, const char
 #ifdef CONFIG_WAKEUP_LATENCY_HIST
 			timerandwakeup_enabled_data.enabled = 0;
 #endif
+#ifdef CONFIG_TIMERWAKEUPSWITCH_LATENCY_HIST
+			timerwakeupswitch_enabled_data.enabled = 0;
+#endif
+			break;
+#endif
+#ifdef CONFIG_SWITCHTIME_HIST
+		case SWITCHTIME:
+			{
+				int cpu;
+
+				tracepoint_probe_unregister(&__tracepoint_sys_exit,
+				    probe_syscall_exit, NULL);
+
+				for_each_online_cpu(cpu)
+					per_cpu(switchtime_task, cpu) = NULL;
+			}
+#ifdef CONFIG_TIMERWAKEUPSWITCH_LATENCY_HIST
+			timerwakeupswitch_enabled_data.enabled = 0;
+#endif
 			break;
 #endif
 		default:
@@ -797,7 +924,7 @@ static notrace void probe_preemptirqsoff
 				    NSECS_PER_USECS;
 
 				latency_hist(IRQSOFF_LATENCY, cpu, latency, 0,
-				    stop, NULL);
+				    0, stop, NULL, -1);
 			}
 			per_cpu(hist_irqsoff_counting, cpu) = 0;
 		}
@@ -815,7 +942,7 @@ static notrace void probe_preemptirqsoff
 				    NSECS_PER_USECS;
 
 				latency_hist(PREEMPTOFF_LATENCY, cpu, latency,
-				    0, stop, NULL);
+				    0, 0, stop, NULL, -1);
 			}
 			per_cpu(hist_preemptoff_counting, cpu) = 0;
 		}
@@ -834,7 +961,7 @@ static notrace void probe_preemptirqsoff
 				    NSECS_PER_USECS;
 
 				latency_hist(PREEMPTIRQSOFF_LATENCY, cpu,
-				    latency, 0, stop, NULL);
+				    latency, 0, 0, stop, NULL, -1);
 			}
 			per_cpu(hist_preemptirqsoff_counting, cpu) = 0;
 		}
@@ -845,6 +972,9 @@ static notrace void probe_preemptirqsoff
 
 #ifdef CONFIG_WAKEUP_LATENCY_HIST
 static DEFINE_RAW_SPINLOCK(wakeup_lock);
+#ifdef CONFIG_SWITCHTIME_HIST
+static DEFINE_RAW_SPINLOCK(switchtime_lock);
+#endif
 static notrace void probe_sched_migrate_task(void *v, struct task_struct *task,
 	int cpu)
 {
@@ -855,7 +985,6 @@ static notrace void probe_sched_migrate_
 		struct task_struct *cpu_wakeup_task;
 
 		raw_spin_lock_irqsave(&wakeup_lock, flags);
-
 		cpu_wakeup_task = per_cpu(wakeup_task, old_cpu);
 		if (task == cpu_wakeup_task) {
 			put_task_struct(cpu_wakeup_task);
@@ -863,8 +992,24 @@ static notrace void probe_sched_migrate_
 			cpu_wakeup_task = per_cpu(wakeup_task, cpu) = task;
 			get_task_struct(cpu_wakeup_task);
 		}
-
 		raw_spin_unlock_irqrestore(&wakeup_lock, flags);
+
+#ifdef CONFIG_SWITCHTIME_HIST
+		{
+			struct task_struct *cpu_switchtime_task;
+
+			raw_spin_lock_irqsave(&switchtime_lock, flags);
+			cpu_switchtime_task = per_cpu(switchtime_task, old_cpu);
+			if (task == cpu_switchtime_task) {
+				put_task_struct(cpu_switchtime_task);
+				per_cpu(switchtime_task, old_cpu) = NULL;
+				cpu_switchtime_task =
+				    per_cpu(switchtime_task, cpu) = task;
+				get_task_struct(cpu_switchtime_task);
+			}
+			raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+		}
+#endif
 	}
 }
 
@@ -876,8 +1021,23 @@ static notrace void probe_wakeup_latency
 	int cpu = task_cpu(p);
 	struct task_struct *cpu_wakeup_task;
 
-	raw_spin_lock_irqsave(&wakeup_lock, flags);
+#ifdef CONFIG_SWITCHTIME_HIST
+	struct task_struct *cpu_switchtime_task;
+
+	if (!switchtime_pid) {
+		raw_spin_lock_irqsave(&switchtime_lock, flags);
+		cpu_switchtime_task = per_cpu(switchtime_task, cpu);
+		if (cpu_switchtime_task &&
+		    p->prio < cpu_switchtime_task->prio) {
+			cpu_switchtime_task->switchtime_timestamp_hist = 0;
+			put_task_struct(cpu_switchtime_task);
+			per_cpu(switchtime_task, cpu) = NULL;
+		}
+		raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+	}
+#endif
 
+	raw_spin_lock_irqsave(&wakeup_lock, flags);
 	cpu_wakeup_task = per_cpu(wakeup_task, cpu);
 
 	if (wakeup_pid) {
@@ -914,6 +1074,7 @@ static notrace void probe_wakeup_latency
 	long latency;
 	cycle_t stop;
 	struct task_struct *cpu_wakeup_task;
+	int hit = 0;
 
 	raw_spin_lock_irqsave(&wakeup_lock, flags);
 
@@ -949,28 +1110,52 @@ static notrace void probe_wakeup_latency
 	    NSECS_PER_USECS;
 
 	if (per_cpu(wakeup_sharedprio, cpu)) {
-		latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, stop,
-		    next);
+		latency_hist(WAKEUP_LATENCY_SHAREDPRIO, cpu, latency, 0, 0,
+		    stop, next, -1);
 		per_cpu(wakeup_sharedprio, cpu) = 0;
 	} else {
-		latency_hist(WAKEUP_LATENCY, cpu, latency, 0, stop, next);
+		latency_hist(WAKEUP_LATENCY, cpu, latency, 0, 0, stop, next,
+		    -1);
 #ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
 		if (timerandwakeup_enabled_data.enabled) {
 			latency_hist(TIMERANDWAKEUP_LATENCY, cpu,
-			    next->timer_offset + latency, next->timer_offset,
-			    stop, next);
+			    next->timer_offset + latency, next->timer_offset, 0,
+			    stop, next, -1);
 		}
+		hit = 1;
 #endif
 	}
 
 out_reset:
-#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
-	next->timer_offset = 0;
-#endif
 	put_task_struct(cpu_wakeup_task);
 	per_cpu(wakeup_task, cpu) = NULL;
 out:
 	raw_spin_unlock_irqrestore(&wakeup_lock, flags);
+
+#ifdef CONFIG_SWITCHTIME_HIST
+	if (hit && switchtime_enabled_data.enabled &&
+	    (switchtime_pid == 0 || task_pid_nr(next) == switchtime_pid)) {
+		unsigned long flags;
+		struct task_struct *cpu_switchtime_task;
+
+		raw_spin_lock_irqsave(&switchtime_lock, flags);
+
+		cpu_switchtime_task = per_cpu(switchtime_task, cpu) =
+		    next;
+		get_task_struct(cpu_switchtime_task);
+
+		raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+
+		next->switchtime_timestamp_hist = stop;
+		next->wakeup_latency = latency;
+		next->switchtime_prev = current;
+	}
+#ifdef CONFIG_MISSED_TIMER_OFFSETS_HIST
+	else
+	  next->timer_offset = 0;
+#endif
+
+#endif
 }
 #endif
 
@@ -992,10 +1177,28 @@ static notrace void probe_hrtimer_interr
 				return;
 		}
 
+#ifdef CONFIG_SWITCHTIME_HIST
+		if (!switchtime_pid) {
+			unsigned long flags;
+			struct task_struct *cpu_switchtime_task;
+
+			raw_spin_lock_irqsave(&switchtime_lock, flags);
+			cpu_switchtime_task = per_cpu(switchtime_task, cpu);
+			if (cpu_switchtime_task &&
+			    task->prio < cpu_switchtime_task->prio) {
+				cpu_switchtime_task->switchtime_timestamp_hist =
+				    0;
+				put_task_struct(cpu_switchtime_task);
+				per_cpu(switchtime_task, cpu) = NULL;
+			}
+			raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+		}
+#endif
+
 		now = ftrace_now(cpu);
 		latency = (long) div_s64(-latency_ns, NSECS_PER_USECS);
-		latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, latency, now,
-		    task);
+		latency_hist(MISSED_TIMER_OFFSETS, cpu, latency, 0, 0, now,
+		    task, -1);
 #ifdef CONFIG_WAKEUP_LATENCY_HIST
 		task->timer_offset = latency;
 #endif
@@ -1003,6 +1206,66 @@ static notrace void probe_hrtimer_interr
 }
 #endif
 
+#ifdef CONFIG_SWITCHTIME_HIST
+static notrace void probe_syscall_exit(void *v, struct pt_regs *regs, long ret)
+{
+	unsigned long flags;
+	int cpu = task_cpu(current);
+	struct task_struct *cpu_switchtime_task;
+
+	raw_spin_lock_irqsave(&switchtime_lock, flags);
+
+	cpu_switchtime_task = per_cpu(switchtime_task, cpu);
+	if (cpu_switchtime_task == NULL)
+		goto out;
+
+	if (current == cpu_switchtime_task) {
+		unsigned long latency;
+		cycle_t stop;
+		int syscall_nr;
+
+		if (switchtime_pid) {
+			if (likely(switchtime_pid !=
+			    task_pid_nr(current)))
+				goto out;
+		}
+
+		/*
+		 * The task we are waiting for exited from sytem call.
+		 * Calculate latency since start of context switch and store it
+		 * in histogram.
+		 */
+		stop = ftrace_now(raw_smp_processor_id());
+
+		latency = ((long) (stop - current->switchtime_timestamp_hist)) /
+		    NSECS_PER_USECS;
+
+		syscall_nr = syscall_get_nr(current, regs);
+
+		current->switchtime_timestamp_hist = 0;
+
+		latency_hist(SWITCHTIME, cpu, latency, 0, 0, stop,
+		    current->switchtime_prev, syscall_nr);
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
+		if (timerwakeupswitch_enabled_data.enabled) {
+			latency_hist(TIMERWAKEUPSWITCH_LATENCY, cpu,
+			    current->timer_offset + current->wakeup_latency +
+			    latency, current->timer_offset,
+			    current->wakeup_latency, stop,
+			    current->switchtime_prev, syscall_nr);
+		}
+		current->timer_offset = current->wakeup_latency = 0;
+#endif
+	}
+	put_task_struct(cpu_switchtime_task);
+	per_cpu(switchtime_task, cpu) = NULL;
+
+out:
+	raw_spin_unlock_irqrestore(&switchtime_lock, flags);
+}
+#endif
+
 static __init int latency_hist_init(void)
 {
 	struct dentry *latency_hist_root = NULL;
@@ -1147,6 +1410,32 @@ static __init int latency_hist_init(void
 	    &enable_fops);
 #endif
 
+#ifdef CONFIG_SWITCHTIME_HIST
+	dentry = debugfs_create_dir(switchtime_dir,
+	    latency_hist_root);
+	for_each_possible_cpu(i) {
+		sprintf(name, cpufmt, i);
+		entry = debugfs_create_file(name, 0444, dentry,
+		    &per_cpu(switchtime, i), &latency_hist_fops);
+		my_hist = &per_cpu(switchtime, i);
+		atomic_set(&my_hist->hist_mode, 1);
+		my_hist->min_lat = LONG_MAX;
+
+		sprintf(name, cpufmt_maxlatproc, i);
+		mp = &per_cpu(switchtime_maxlatproc, i);
+		entry = debugfs_create_file(name, 0444, dentry, mp,
+		    &maxlatproc_fops);
+		clear_maxlatprocdata(mp);
+	}
+	entry = debugfs_create_file("pid", 0644, dentry,
+	    (void *)&switchtime_pid, &pid_fops);
+	entry = debugfs_create_file("reset", 0644, dentry,
+	    (void *)SWITCHTIME, &latency_hist_reset_fops);
+	entry = debugfs_create_file("switchtime", 0644,
+	    enable_root, (void *)&switchtime_enabled_data,
+	    &enable_fops);
+#endif
+
 #if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
 	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST)
 	dentry = debugfs_create_dir(timerandwakeup_latency_hist_dir,
@@ -1172,6 +1461,32 @@ static __init int latency_hist_init(void
 	    enable_root, (void *)&timerandwakeup_enabled_data,
 	    &enable_fops);
 #endif
+#if defined(CONFIG_WAKEUP_LATENCY_HIST) && \
+	defined(CONFIG_MISSED_TIMER_OFFSETS_HIST) && \
+	defined(CONFIG_SWITCHTIME_HIST)
+	dentry = debugfs_create_dir(timerwakeupswitch_latency_hist_dir,
+	    latency_hist_root);
+	for_each_possible_cpu(i) {
+		sprintf(name, cpufmt, i);
+		entry = debugfs_create_file(name, 0444, dentry,
+		    &per_cpu(timerwakeupswitch_latency_hist, i),
+		    &latency_hist_fops);
+		my_hist = &per_cpu(timerwakeupswitch_latency_hist, i);
+		atomic_set(&my_hist->hist_mode, 1);
+		my_hist->min_lat = LONG_MAX;
+
+		sprintf(name, cpufmt_maxlatproc, i);
+		mp = &per_cpu(timerwakeupswitch_maxlatproc, i);
+		entry = debugfs_create_file(name, 0444, dentry, mp,
+		    &maxlatproc_fops);
+		clear_maxlatprocdata(mp);
+	}
+	entry = debugfs_create_file("reset", 0644, dentry,
+	    (void *)TIMERWAKEUPSWITCH_LATENCY, &latency_hist_reset_fops);
+	entry = debugfs_create_file("timerwakeupswitch", 0644,
+	    enable_root, (void *)&timerwakeupswitch_enabled_data,
+	    &enable_fops);
+#endif
 	return 0;
 }
 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ