This patch adds context switch tracing, of the format of: _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 1d..3 137us+: 0:140:R --> 2912:120 sshd-2912 1d..3 216us+: 2912:120:S --> 0:140 swapper-0 1d..3 261us+: 0:140:R --> 2912:120 bash-2920 0d..3 267us+: 2920:120:S --> 0:140 sshd-2912 1d..3 330us!: 2912:120:S --> 0:140 swapper-0 1d..3 2389us+: 0:140:R --> 2847:120 yum-upda-2847 1d..3 2411us!: 2847:120:S --> 0:140 swapper-0 0d..3 11089us+: 0:140:R --> 3139:120 gdm-bina-3139 0d..3 11113us!: 3139:120:S --> 0:140 swapper-0 1d..3 102328us+: 0:140:R --> 2847:120 yum-upda-2847 1d..3 102348us!: 2847:120:S --> 0:140 "sched_switch" is added to /debugfs/tracing/available_tracers Signed-off-by: Steven Rostedt Cc: Mathieu Desnoyers --- lib/tracing/Kconfig | 9 ++ lib/tracing/Makefile | 1 lib/tracing/trace_sched_switch.c | 166 +++++++++++++++++++++++++++++++++++++++ lib/tracing/tracer.c | 43 ++++++++++ lib/tracing/tracer.h | 23 +++++ 5 files changed, 241 insertions(+), 1 deletion(-) Index: linux-mcount.git/lib/tracing/Kconfig =================================================================== --- linux-mcount.git.orig/lib/tracing/Kconfig 2008-01-30 15:18:18.000000000 -0500 +++ linux-mcount.git/lib/tracing/Kconfig 2008-01-30 15:32:33.000000000 -0500 @@ -23,3 +23,12 @@ config FUNCTION_TRACER insert a call to an architecture specific __mcount routine, that the debugging mechanism using this facility will hook by providing a set of inline routines. + +config CONTEXT_SWITCH_TRACER + bool "Trace process context switches" + depends on DEBUG_KERNEL + select TRACING + help + This tracer hooks into the context switch and records + all switching of tasks. + Index: linux-mcount.git/lib/tracing/Makefile =================================================================== --- linux-mcount.git.orig/lib/tracing/Makefile 2008-01-30 15:18:18.000000000 -0500 +++ linux-mcount.git/lib/tracing/Makefile 2008-01-30 15:32:34.000000000 -0500 @@ -1,6 +1,7 @@ obj-$(CONFIG_MCOUNT) += libmcount.o obj-$(CONFIG_TRACING) += tracer.o +obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o obj-$(CONFIG_FUNCTION_TRACER) += trace_function.o libmcount-y := mcount.o Index: linux-mcount.git/lib/tracing/trace_sched_switch.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-mcount.git/lib/tracing/trace_sched_switch.c 2008-01-30 15:32:34.000000000 -0500 @@ -0,0 +1,166 @@ +/* + * trace context switch + * + * Copyright (C) 2007 Steven Rostedt + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "tracer.h" + +static struct tracing_trace *tracer_trace; +static int trace_enabled __read_mostly; +static atomic_t sched_ref; +int tracing_sched_switch_enabled __read_mostly; + +static notrace void sched_switch_callback(const struct marker *mdata, + void *private_data, + const char *format, ...) +{ + struct tracing_trace **p = mdata->private; + struct tracing_trace *tr = *p; + struct tracing_trace_cpu *data; + struct task_struct *prev; + struct task_struct *next; + unsigned long flags; + long disabled; + va_list ap; + int cpu; + + if (!trace_enabled) + return; + + va_start(ap, format); + prev = va_arg(ap, typeof(prev)); + next = va_arg(ap, typeof(next)); + va_end(ap); + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + disabled = atomic_inc_return(&data->disabled); + + if (likely(disabled == 1)) + tracing_sched_switch_trace(tr, data, prev, next, flags); + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +static notrace void sched_switch_reset(struct tracing_trace *tr) +{ + int cpu; + + tr->time_start = now(); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} + +static notrace void start_sched_trace(struct tracing_trace *tr) +{ + sched_switch_reset(tr); + trace_enabled = 1; + tracing_start_sched_switch(); +} + +static notrace void stop_sched_trace(struct tracing_trace *tr) +{ + tracing_stop_sched_switch(); + trace_enabled = 0; +} + +static notrace void sched_switch_trace_init(struct tracing_trace *tr) +{ + tracer_trace = tr; + + if (tr->ctrl) + start_sched_trace(tr); +} + +static notrace void sched_switch_trace_reset(struct tracing_trace *tr) +{ + if (tr->ctrl) + stop_sched_trace(tr); +} + +static void sched_switch_trace_ctrl_update(struct tracing_trace *tr) +{ + /* When starting a new trace, reset the buffers */ + if (tr->ctrl) + start_sched_trace(tr); + else + stop_sched_trace(tr); +} + +static struct trace_types_struct sched_switch_trace __read_mostly = +{ + .name = "sched_switch", + .init = sched_switch_trace_init, + .reset = sched_switch_trace_reset, + .ctrl_update = sched_switch_trace_ctrl_update, +}; + +static int tracing_sched_arm(void) +{ + int ret; + + ret = marker_arm("kernel_sched_schedule"); + if (ret) + pr_info("sched trace: Couldn't arm probe switch_to\n"); + + return ret; +} + +void tracing_start_sched_switch(void) +{ + long ref; + + ref = atomic_inc_return(&sched_ref); + if (tracing_sched_switch_enabled && ref == 1) + tracing_sched_arm(); +} + +void tracing_stop_sched_switch(void) +{ + long ref; + + ref = atomic_dec_and_test(&sched_ref); + if (tracing_sched_switch_enabled && ref) + marker_disarm("kernel_sched_schedule"); +} + +__init static int init_sched_switch_trace(void) +{ + int ret; + + ret = register_trace(&sched_switch_trace); + if (ret) + return ret; + + ret = marker_probe_register("kernel_sched_schedule", + "prev %p next %p", + sched_switch_callback, + &tracer_trace); + if (ret) { + pr_info("sched trace: Couldn't add marker" + " probe to switch_to\n"); + goto out; + } + + if (atomic_read(&sched_ref)) + ret = tracing_sched_arm(); + + tracing_sched_switch_enabled = 1; + + out: + return ret; +} + +device_initcall(init_sched_switch_trace); Index: linux-mcount.git/lib/tracing/tracer.c =================================================================== --- linux-mcount.git.orig/lib/tracing/tracer.c 2008-01-30 15:18:18.000000000 -0500 +++ linux-mcount.git/lib/tracing/tracer.c 2008-01-30 15:32:34.000000000 -0500 @@ -52,6 +52,7 @@ enum trace_type { __TRACE_FIRST_TYPE = 0, TRACE_FN, + TRACE_CTX, __TRACE_LAST_TYPE }; @@ -230,6 +231,24 @@ notrace void tracing_function_trace(stru entry->fn.parent_ip = parent_ip; } +notrace void tracing_sched_switch_trace(struct tracing_trace *tr, + struct tracing_trace_cpu *data, + struct task_struct *prev, + struct task_struct *next, + unsigned long flags) +{ + struct tracing_entry *entry; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_CTX; + entry->ctx.prev_pid = prev->pid; + entry->ctx.prev_prio = prev->prio; + entry->ctx.prev_state = prev->state; + entry->ctx.next_pid = next->pid; + entry->ctx.next_prio = next->prio; +} + enum trace_iterator { TRACE_ITER_SYM_ONLY = 1, TRACE_ITER_VERBOSE = 2, @@ -548,6 +567,8 @@ lat_print_timestamp(struct seq_file *m, seq_puts(m, " : "); } +static const char state_to_char[] = TASK_STATE_TO_CHAR_STR; + static void notrace print_lat_fmt(struct seq_file *m, struct tracing_iterator *iter, unsigned int trace_idx, int cpu) @@ -558,6 +579,7 @@ print_lat_fmt(struct seq_file *m, struct unsigned long rel_usecs; int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY); int verbose = !!(trace_flags & TRACE_ITER_VERBOSE); + int S; if (!next_entry) next_entry = entry; @@ -584,6 +606,16 @@ print_lat_fmt(struct seq_file *m, struct seq_print_ip_sym(m, entry->fn.parent_ip, sym_only); seq_puts(m, ")\n"); break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + seq_printf(m, " %d:%d:%c --> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + break; } } @@ -595,6 +627,7 @@ static void notrace print_trace_fmt(stru unsigned long secs; int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY); unsigned long long t; + int S; t = cycles_to_usecs(entry->t); usec_rem = do_div(t, 1000000ULL); @@ -613,6 +646,16 @@ static void notrace print_trace_fmt(stru sym_only); } break; + case TRACE_CTX: + S = entry->ctx.prev_state < sizeof(state_to_char) ? + state_to_char[entry->ctx.prev_state] : 'X'; + seq_printf(m, " %d:%d:%c ==> %d:%d\n", + entry->ctx.prev_pid, + entry->ctx.prev_prio, + S, + entry->ctx.next_pid, + entry->ctx.next_prio); + break; } seq_printf(m, "\n"); } Index: linux-mcount.git/lib/tracing/tracer.h =================================================================== --- linux-mcount.git.orig/lib/tracing/tracer.h 2008-01-30 15:18:18.000000000 -0500 +++ linux-mcount.git/lib/tracing/tracer.h 2008-01-30 15:32:34.000000000 -0500 @@ -10,6 +10,14 @@ struct tracing_function { unsigned long parent_ip; }; +struct tracing_sched_switch { + unsigned int prev_pid; + unsigned char prev_prio; + unsigned char prev_state; + unsigned int next_pid; + unsigned char next_prio; +}; + struct tracing_entry { char type; char cpu; /* who will want to trace more than 256 CPUS? */ @@ -18,7 +26,10 @@ struct tracing_entry { int pid; cycle_t t; char comm[TASK_COMM_LEN]; - struct tracing_function fn; + union { + struct tracing_function fn; + struct tracing_sched_switch ctx; + }; }; struct tracing_trace_cpu { @@ -82,11 +93,21 @@ void tracing_function_trace(struct traci unsigned long ip, unsigned long parent_ip, unsigned long flags); +void tracing_sched_switch_trace(struct tracing_trace *tr, + struct tracing_trace_cpu *data, + struct task_struct *prev, + struct task_struct *next, + unsigned long flags); + void tracing_start_function_trace(void); void tracing_stop_function_trace(void); int register_trace(struct trace_types_struct *type); void unregister_trace(struct trace_types_struct *type); +void tracing_start_sched_switch(void); +void tracing_stop_sched_switch(void); + +extern int tracing_sched_switch_enabled; static inline notrace cycle_t now(void) { -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/