This is a simple trace that uses the mcount infrastructure. It is designed to be fast and small, and easy to use. It is useful to record things that happen over a very short period of time, and not to analyze the system in general. An interface is added to the debugfs /debugfs/tracing/ This patch adds the following files: available_tracers list of available tracers. Currently only "function" is available. current_tracer The trace that is currently active. Empty on start up. To switch to a tracer simply echo one of the tracers that are listed in available_tracers: echo function > /debugfs/tracing/current_tracer To disable the tracer: echo disable > /debugfs/tracing/current_tracer trace_ctrl echoing "1" into this file starts the mcount function tracing (if sysctl kernel.mcount_enabled=1) echoing "0" turns it off. latency_trace This file is readonly and holds the result of the trace. trace This file outputs a easier to read version of the trace. iter_ctrl Controls the way the output of traces look. So far there's two controls: echoing in "symonly" will only show the kallsyms variables without the addresses (if kallsyms was configured) echoing in "verbose" will change the output to show a lot more data, but not very easy to understand by humans. echoing in "nosymonly" turns off symonly. echoing in "noverbose" turns off verbose. The output of the function_trace file is as follows "echo noverbose > /debugfs/tracing/iter_ctrl" preemption latency trace v1.1.5 on 2.6.24-rc7-tst -------------------------------------------------------------------- latency: 0 us, #419428/4361791, CPU#1 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) ----------------- | task: -0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- _------=> CPU# / _-----=> irqs-off | / _----=> need-resched || / _---=> hardirq/softirq ||| / _--=> preempt-depth |||| / ||||| delay cmd pid ||||| time | caller \ / ||||| \ | / swapper-0 0d.h. 1595128us+: set_normalized_timespec+0x8/0x2d (ktime_get_ts+0x4a/0x4e ) swapper-0 0d.h. 1595131us+: _spin_lock+0x8/0x18 (hrtimer_interrupt+0x6e/0x1b0 ) Or with verbose turned on: "echo verbose > /debugfs/tracing/iter_ctrl" preemption latency trace v1.1.5 on 2.6.24-rc7-tst -------------------------------------------------------------------- latency: 0 us, #419428/4361791, CPU#1 | (M:desktop VP:0, KP:0, SP:0 HP:0 #P:4) ----------------- | task: -0 (uid:0 nice:0 policy:0 rt_prio:0) ----------------- swapper 0 0 9 00000000 00000000 [f3675f41] 1595.128ms (+0.003ms): set_normalized_timespec+0x8/0x2d (ktime_get_ts+0x4a/0x4e ) swapper 0 0 9 00000000 00000001 [f3675f45] 1595.131ms (+0.003ms): _spin_lock+0x8/0x18 (hrtimer_interrupt+0x6e/0x1b0 ) swapper 0 0 9 00000000 00000002 [f3675f48] 1595.135ms (+0.003ms): _spin_lock+0x8/0x18 (hrtimer_interrupt+0x6e/0x1b0 ) The "trace" file is not affected by the verbose mode, but is by the symonly. echo "nosymonly" > /debugfs/tracing/iter_ctrl tracer: [ 81.479967] CPU 0: bash:3154 register_mcount_function+0x5f/0x66 <-- _spin_unlock_irqrestore+0xe/0x5a [ 81.479967] CPU 0: bash:3154 _spin_unlock_irqrestore+0x3e/0x5a <-- sub_preempt_count+0xc/0x7a [ 81.479968] CPU 0: bash:3154 sub_preempt_count+0x30/0x7a <-- in_lock_functions+0x9/0x24 [ 81.479968] CPU 0: bash:3154 vfs_write+0x11d/0x155 <-- dnotify_parent+0x12/0x78 [ 81.479968] CPU 0: bash:3154 dnotify_parent+0x2d/0x78 <-- _spin_lock+0xe/0x70 [ 81.479969] CPU 0: bash:3154 _spin_lock+0x1b/0x70 <-- add_preempt_count+0xe/0x77 [ 81.479969] CPU 0: bash:3154 add_preempt_count+0x3e/0x77 <-- in_lock_functions+0x9/0x24 echo "symonly" > /debugfs/tracing/iter_ctrl tracer: [ 81.479913] CPU 0: bash:3154 register_mcount_function+0x5f/0x66 <-- _spin_unlock_irqrestore+0xe/0x5a [ 81.479913] CPU 0: bash:3154 _spin_unlock_irqrestore+0x3e/0x5a <-- sub_preempt_count+0xc/0x7a [ 81.479913] CPU 0: bash:3154 sub_preempt_count+0x30/0x7a <-- in_lock_functions+0x9/0x24 [ 81.479914] CPU 0: bash:3154 vfs_write+0x11d/0x155 <-- dnotify_parent+0x12/0x78 [ 81.479914] CPU 0: bash:3154 dnotify_parent+0x2d/0x78 <-- _spin_lock+0xe/0x70 [ 81.479914] CPU 0: bash:3154 _spin_lock+0x1b/0x70 <-- add_preempt_count+0xe/0x77 [ 81.479914] CPU 0: bash:3154 add_preempt_count+0x3e/0x77 <-- in_lock_functions+0x9/0x24 Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- lib/Makefile | 1 lib/tracing/Kconfig | 15 lib/tracing/Makefile | 3 lib/tracing/trace_function.c | 72 ++ lib/tracing/tracer.c | 1160 +++++++++++++++++++++++++++++++++++++++++++ lib/tracing/tracer.h | 96 +++ 6 files changed, 1347 insertions(+) Index: linux-mcount.git/lib/tracing/Kconfig =================================================================== --- linux-mcount.git.orig/lib/tracing/Kconfig 2008-01-29 17:26:18.000000000 -0500 +++ linux-mcount.git/lib/tracing/Kconfig 2008-01-29 18:06:25.000000000 -0500 @@ -8,3 +8,18 @@ config HAVE_MCOUNT config MCOUNT bool select FRAME_POINTER + +config TRACING + bool + select DEBUG_FS + +config FUNCTION_TRACER + bool "Profiler instrumentation based tracer" + depends on DEBUG_KERNEL && HAVE_MCOUNT + select MCOUNT + select TRACING + help + Use profiler instrumentation, adding -pg to CFLAGS. This will + insert a call to an architecture specific __mcount routine, + that the debugging mechanism using this facility will hook by + providing a set of inline routines. Index: linux-mcount.git/lib/tracing/Makefile =================================================================== --- linux-mcount.git.orig/lib/tracing/Makefile 2008-01-29 17:26:18.000000000 -0500 +++ linux-mcount.git/lib/tracing/Makefile 2008-01-29 18:06:25.000000000 -0500 @@ -1,3 +1,6 @@ obj-$(CONFIG_MCOUNT) += libmcount.o +obj-$(CONFIG_TRACING) += tracer.o +obj-$(CONFIG_FUNCTION_TRACER) += trace_function.o + libmcount-y := mcount.o Index: linux-mcount.git/lib/tracing/tracer.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-mcount.git/lib/tracing/tracer.c 2008-01-29 18:07:38.000000000 -0500 @@ -0,0 +1,1160 @@ +/* + * ring buffer based mcount tracer + * + * Copyright (C) 2007 Steven Rostedt + * + * Originally taken from the RT patch by: + * Arnaldo Carvalho de Melo + * + * Based on code from the latency_tracer, that is: + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "tracer.h" + +static struct tracing_trace tracer_trace __read_mostly; +static DEFINE_PER_CPU(struct tracing_trace_cpu, tracer_trace_cpu); +static int trace_enabled __read_mostly; +static unsigned long trace_nr_entries = (65536UL); + +static struct trace_types_struct *trace_types __read_mostly; +static struct trace_types_struct *current_trace __read_mostly; +static int max_tracer_type_len; + +static DEFINE_MUTEX(trace_types_lock); + +static int __init set_nr_entries(char *str) +{ + if (!str) + return 0; + trace_nr_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("trace_entries=", set_nr_entries); + +enum trace_type { + __TRACE_FIRST_TYPE = 0, + + TRACE_FN, + + __TRACE_LAST_TYPE +}; + +enum trace_flag_type { + TRACE_FLAG_IRQS_OFF = 0x01, + TRACE_FLAG_NEED_RESCHED = 0x02, + TRACE_FLAG_HARDIRQ = 0x04, + TRACE_FLAG_SOFTIRQ = 0x08, +}; + +int register_trace(struct trace_types_struct *type) +{ + struct trace_types_struct *t; + int len; + int ret = 0; + + if (!type->name) { + pr_info("Tracer must have a name\n"); + return -1; + } + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(type->name, t->name) == 0) { + /* already found */ + pr_info("Trace %s already registered\n", + type->name); + ret = -1; + goto out; + } + } + + type->next = trace_types; + trace_types = type; + len = strlen(type->name); + if (len > max_tracer_type_len) + max_tracer_type_len = len; + out: + mutex_unlock(&trace_types_lock); + + return ret; +} + +void unregister_trace(struct trace_types_struct *type) +{ + struct trace_types_struct **t; + int len; + + mutex_lock(&trace_types_lock); + for (t = &trace_types; *t; t = &(*t)->next) { + if (*t == type) + goto found; + } + pr_info("Trace %s not registered\n", type->name); + goto out; + + found: + *t = (*t)->next; + if (strlen(type->name) != max_tracer_type_len) + goto out; + + max_tracer_type_len = 0; + for (t = &trace_types; *t; t = &(*t)->next) { + len = strlen((*t)->name); + if (len > max_tracer_type_len) + max_tracer_type_len = len; + } + out: + mutex_unlock(&trace_types_lock); +} + +void notrace tracing_reset(struct tracing_trace_cpu *data) +{ + data->trace_idx = 0; + atomic_set(&data->underrun, 0); +} + +#ifdef CONFIG_MCOUNT +static void notrace function_trace_call(unsigned long ip, + unsigned long parent_ip) +{ + struct tracing_trace *tr = &tracer_trace; + struct tracing_trace_cpu *data; + unsigned long flags; + int cpu; + + if (unlikely(!trace_enabled)) + return; + + raw_local_irq_save(flags); + cpu = raw_smp_processor_id(); + data = tr->data[cpu]; + atomic_inc(&data->disabled); + + if (likely(atomic_read(&data->disabled) == 1)) + tracing_function_trace(tr, data, ip, parent_ip, flags); + + atomic_dec(&data->disabled); + raw_local_irq_restore(flags); +} + +static struct mcount_ops trace_ops __read_mostly = +{ + .func = function_trace_call, +}; +#endif + +void tracing_start_function_trace(void) +{ + register_mcount_function(&trace_ops); +} + +void tracing_stop_function_trace(void) +{ + unregister_mcount_function(&trace_ops); +} + +static inline notrace struct tracing_entry * +tracing_get_trace_entry(struct tracing_trace *tr, + struct tracing_trace_cpu *data) +{ + unsigned long idx, idx_next; + struct tracing_entry *entry; + + idx = data->trace_idx; + idx_next = idx + 1; + + if (unlikely(idx_next >= tr->entries)) { + atomic_inc(&data->underrun); + idx_next = 0; + } + + data->trace_idx = idx_next; + + if (unlikely(idx_next != 0 && atomic_read(&data->underrun))) + atomic_inc(&data->underrun); + + entry = data->trace + idx * TRACING_ENTRY_SIZE; + + return entry; +} + +static inline notrace void +tracing_generic_entry_update(struct tracing_entry *entry, + unsigned long flags) +{ + struct task_struct *tsk = current; + unsigned long pc; + + pc = preempt_count(); + + entry->preempt_count = pc & 0xff; + entry->pid = tsk->pid; + entry->t = now(); + entry->flags = (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | + ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | + ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | + (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); + memcpy(entry->comm, tsk->comm, TASK_COMM_LEN); +} + +notrace void tracing_function_trace(struct tracing_trace *tr, + struct tracing_trace_cpu *data, + unsigned long ip, + unsigned long parent_ip, + unsigned long flags) +{ + struct tracing_entry *entry; + + entry = tracing_get_trace_entry(tr, data); + tracing_generic_entry_update(entry, flags); + entry->type = TRACE_FN; + entry->fn.ip = ip; + entry->fn.parent_ip = parent_ip; +} + +enum trace_iterator { + TRACE_ITER_SYM_ONLY = 1, + TRACE_ITER_VERBOSE = 2, +}; + +/* These must match the bit postions above */ +static const char *trace_options[] = { + "symonly", + "verbose", + NULL +}; + +static unsigned trace_flags; + +enum trace_file_type { + TRACE_FILE_LAT_FMT = 1, +}; + +static struct tracing_entry *tracing_entry_idx(struct tracing_trace *tr, + unsigned long idx, + int cpu) +{ + struct tracing_entry *array = tr->data[cpu]->trace; + unsigned long underrun; + + if (idx >= tr->entries) + return NULL; + + underrun = atomic_read(&tr->data[cpu]->underrun); + if (underrun) + idx = ((underrun - 1) + idx) % tr->entries; + else if (idx >= tr->data[cpu]->trace_idx) + return NULL; + + return &array[idx]; +} + +static struct notrace tracing_entry * +find_next_entry(struct tracing_iterator *iter, int *ent_cpu) +{ + struct tracing_trace *tr = iter->tr; + struct tracing_entry *ent, *next = NULL; + int next_cpu = -1; + int cpu; + + for_each_possible_cpu(cpu) { + if (!tr->data[cpu]->trace) + continue; + ent = tracing_entry_idx(tr, iter->next_idx[cpu], cpu); + if (ent && (!next || next->t > ent->t)) { + next = ent; + next_cpu = cpu; + } + } + + if (ent_cpu) + *ent_cpu = next_cpu; + + return next; +} + +static void *find_next_entry_inc(struct tracing_iterator *iter) +{ + struct tracing_entry *next; + int next_cpu = -1; + + next = find_next_entry(iter, &next_cpu); + + if (next) { + iter->next_idx[next_cpu]++; + iter->idx++; + } + iter->ent = next; + iter->cpu = next_cpu; + + return next ? iter : NULL; +} + +static void notrace * +s_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct tracing_iterator *iter = m->private; + void *ent; + void *last_ent = iter->ent; + int i = (int)*pos; + + (*pos)++; + + /* can't go backwards */ + if (iter->idx > i) + return NULL; + + if (iter->idx < 0) + ent = find_next_entry_inc(iter); + else + ent = iter; + + while (ent && iter->idx < i) + ent = find_next_entry_inc(iter); + + iter->pos = *pos; + + if (last_ent && !ent) + seq_puts(m, "\n\nvim:ft=help\n"); + + return ent; +} + +static void *s_start(struct seq_file *m, loff_t *pos) +{ + struct tracing_iterator *iter = m->private; + void *p = NULL; + loff_t l = 0; + int i; + + mutex_lock(&trace_types_lock); + + if (!current_trace || current_trace != iter->trace) + return NULL; + + /* let the tracer grab locks here if needed */ + if (current_trace->start) + current_trace->start(iter); + + if (*pos != iter->pos) { + iter->ent = NULL; + iter->cpu = 0; + iter->idx = -1; + + for (i = 0; i < NR_CPUS; i++) + iter->next_idx[i] = 0; + + for (p = iter; p && l < *pos; p = s_next(m, p, &l)) + ; + + } else { + l = *pos; + p = s_next(m, p, &l); + } + + return p; +} + +static void s_stop(struct seq_file *m, void *p) +{ + struct tracing_iterator *iter = m->private; + + /* let the tracer release locks here if needed */ + if (current_trace && current_trace == iter->trace && iter->trace->stop) + iter->trace->stop(iter); + + mutex_unlock(&trace_types_lock); +} + +#ifdef CONFIG_KALLSYMS +static void seq_print_symbol(struct seq_file *m, + const char *fmt, unsigned long address) +{ + char buffer[KSYM_SYMBOL_LEN]; + + sprint_symbol(buffer, address); + seq_printf(m, fmt, buffer); +} +#else +# define seq_print_symbol(m, fmt, address) do { } while (0) +#endif + +#ifndef CONFIG_64BIT +# define IP_FMT "%08lx" +#else +# define IP_FMT "%016lx" +#endif + +static void notrace seq_print_ip_sym(struct seq_file *m, + unsigned long ip, int sym_only) +{ + if (!ip) { + seq_printf(m, "0"); + return; + } + + seq_print_symbol(m, "%s", ip); + if (!sym_only) + seq_printf(m, " <" IP_FMT ">", ip); +} + +static void notrace print_help_header(struct seq_file *m) +{ + seq_puts(m, " _------=> CPU# \n"); + seq_puts(m, " / _-----=> irqs-off \n"); + seq_puts(m, " | / _----=> need-resched \n"); + seq_puts(m, " || / _---=> hardirq/softirq \n"); + seq_puts(m, " ||| / _--=> preempt-depth \n"); + seq_puts(m, " |||| / \n"); + seq_puts(m, " ||||| delay \n"); + seq_puts(m, " cmd pid ||||| time | caller \n"); + seq_puts(m, " \\ / ||||| \\ | / \n"); +} + +static void notrace print_trace_header(struct seq_file *m, + struct tracing_iterator *iter) +{ + struct tracing_trace *tr = iter->tr; + struct tracing_trace_cpu *data = tr->data[tr->cpu]; + struct trace_types_struct *type = current_trace; + unsigned long underruns = 0; + unsigned long underrun; + unsigned long entries = 0; + int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY); + int cpu; + const char *name = "preemption"; + + if (type) + name = type->name; + + for_each_possible_cpu(cpu) { + if (tr->data[cpu]->trace) { + underrun = atomic_read(&tr->data[cpu]->underrun); + if (underrun) { + underruns += underrun; + entries += tr->entries; + } else + entries += tr->data[cpu]->trace_idx; + } + } + + seq_printf(m, "%s latency trace v1.1.5 on %s\n", + name, UTS_RELEASE); + seq_puts(m, "-----------------------------------" + "---------------------------------\n"); + seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |" + " (M:%s VP:%d, KP:%d, SP:%d HP:%d", + cycles_to_usecs(data->saved_latency), + entries, + (entries + underruns), + tr->cpu, +#if defined(CONFIG_PREEMPT_NONE) + "server", +#elif defined(CONFIG_PREEMPT_VOLUNTARY) + "desktop", +#elif defined(CONFIG_PREEMPT_DESKTOP) + "preempt", +#else + "unknown", +#endif + /* These are reserved for later use */ + 0, 0, 0, 0); +#ifdef CONFIG_SMP + seq_printf(m, " #P:%d)\n", num_online_cpus()); +#else + seq_puts(m, ")\n"); +#endif + seq_puts(m, " -----------------\n"); + seq_printf(m, " | task: %.16s-%d " + "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n", + data->comm, data->pid, data->uid, data->nice, + data->policy, data->rt_priority); + seq_puts(m, " -----------------\n"); + + if (data->critical_start) { + seq_puts(m, " => started at: "); + seq_print_ip_sym(m, data->critical_start, sym_only); + seq_puts(m, "\n => ended at: "); + seq_print_ip_sym(m, data->critical_end, sym_only); + seq_puts(m, "\n"); + } + + seq_puts(m, "\n"); +} + + +static void notrace +lat_print_generic(struct seq_file *m, struct tracing_entry *entry, int cpu) +{ + int hardirq, softirq; + + seq_printf(m, "%8.8s-%-5d ", entry->comm, entry->pid); + seq_printf(m, "%d", cpu); + seq_printf(m, "%c%c", + (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' : '.', + ((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.')); + + hardirq = entry->flags & TRACE_FLAG_HARDIRQ; + softirq = entry->flags & TRACE_FLAG_SOFTIRQ; + if (hardirq && softirq) + seq_putc(m, 'H'); + else { + if (hardirq) + seq_putc(m, 'h'); + else { + if (softirq) + seq_putc(m, 's'); + else + seq_putc(m, '.'); + } + } + + if (entry->preempt_count) + seq_printf(m, "%x", entry->preempt_count); + else + seq_puts(m, "."); +} + +unsigned long preempt_mark_thresh = 100; + +static void notrace +lat_print_timestamp(struct seq_file *m, unsigned long long abs_usecs, + unsigned long rel_usecs) +{ + seq_printf(m, " %4lldus", abs_usecs); + if (rel_usecs > preempt_mark_thresh) + seq_puts(m, "!: "); + else if (rel_usecs > 1) + seq_puts(m, "+: "); + else + seq_puts(m, " : "); +} + +static void notrace +print_lat_fmt(struct seq_file *m, struct tracing_iterator *iter, + unsigned int trace_idx, int cpu) +{ + struct tracing_entry *entry = iter->ent; + struct tracing_entry *next_entry = find_next_entry(iter, NULL); + unsigned long abs_usecs; + unsigned long rel_usecs; + int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY); + int verbose = !!(trace_flags & TRACE_ITER_VERBOSE); + + if (!next_entry) + next_entry = entry; + rel_usecs = cycles_to_usecs(next_entry->t - entry->t); + abs_usecs = cycles_to_usecs(entry->t - iter->tr->time_start); + + if (verbose) { + seq_printf(m, "%16s %5d %d %d %08x %08x [%08lx]" + " %ld.%03ldms (+%ld.%03ldms): ", + entry->comm, + entry->pid, cpu, entry->flags, + entry->preempt_count, trace_idx, + cycles_to_usecs(entry->t), + abs_usecs/1000, + abs_usecs % 1000, rel_usecs/1000, rel_usecs % 1000); + } else { + lat_print_generic(m, entry, cpu); + lat_print_timestamp(m, abs_usecs, rel_usecs); + } + switch (entry->type) { + case TRACE_FN: + seq_print_ip_sym(m, entry->fn.ip, sym_only); + seq_puts(m, " ("); + seq_print_ip_sym(m, entry->fn.parent_ip, sym_only); + seq_puts(m, ")\n"); + break; + } +} + +static void notrace print_trace_fmt(struct seq_file *m, + struct tracing_iterator *iter) +{ + struct tracing_entry *entry = iter->ent; + unsigned long usec_rem; + unsigned long secs; + int sym_only = !!(trace_flags & TRACE_ITER_SYM_ONLY); + unsigned long long t; + + t = cycles_to_usecs(entry->t); + usec_rem = do_div(t, 1000000ULL); + secs = (unsigned long)t; + + seq_printf(m, "[%5lu.%06lu] ", secs, usec_rem); + seq_printf(m, "CPU %d: ", iter->cpu); + seq_printf(m, "%s:%d ", entry->comm, + entry->pid); + switch (entry->type) { + case TRACE_FN: + seq_print_ip_sym(m, entry->fn.ip, sym_only); + if (entry->fn.parent_ip) { + seq_printf(m, " <-- "); + seq_print_ip_sym(m, entry->fn.parent_ip, + sym_only); + } + break; + } + seq_printf(m, "\n"); +} + +static int trace_empty(struct tracing_iterator *iter) +{ + struct tracing_trace_cpu *data; + int cpu; + + for_each_possible_cpu(cpu) { + data = iter->tr->data[cpu]; + + if (data->trace && + (data->trace_idx || + atomic_read(&data->underrun))) + return 0; + } + return 1; +} + +static int s_show(struct seq_file *m, void *v) +{ + struct tracing_iterator *iter = v; + + if (iter->ent == NULL) { + if (iter->iter_flags & TRACE_FILE_LAT_FMT) { + /* print nothing if the buffers are empty */ + if (trace_empty(iter)) + return 0; + print_trace_header(m, iter); + if (!(trace_flags & TRACE_ITER_VERBOSE)) + print_help_header(m); + } else + seq_printf(m, "tracer:\n"); + } else { + if (iter->iter_flags & TRACE_FILE_LAT_FMT) + print_lat_fmt(m, iter, iter->idx, iter->cpu); + else + print_trace_fmt(m, iter); + } + + return 0; +} + +static struct seq_operations tracer_seq_ops = { + .start = s_start, + .next = s_next, + .stop = s_stop, + .show = s_show, +}; + +static struct tracing_iterator notrace * +__tracing_open(struct inode *inode, struct file *file, int *ret) +{ + struct tracing_iterator *iter; + + iter = kzalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + *ret = -ENOMEM; + goto out; + } + + iter->tr = inode->i_private; + iter->trace = current_trace; + iter->pos = -1; + + /* TODO stop tracer */ + *ret = seq_open(file, &tracer_seq_ops); + if (!*ret) { + struct seq_file *m = file->private_data; + m->private = iter; + + /* stop the trace while dumping */ + if (iter->tr->ctrl) + trace_enabled = 0; + + if (iter->trace && iter->trace->open) + iter->trace->open(iter); + } else { + kfree(iter); + iter = NULL; + } + + out: + return iter; +} + +int tracing_open_generic(struct inode *inode, struct file *filp) +{ + filp->private_data = inode->i_private; + return 0; +} + +int tracing_release(struct inode *inode, struct file *file) +{ + struct seq_file *m = (struct seq_file *)file->private_data; + struct tracing_iterator *iter = m->private; + + if (iter->trace && iter->trace->close) + iter->trace->close(iter); + + /* reenable tracing if it was previously enabled */ + if (iter->tr->ctrl) + trace_enabled = 1; + + seq_release(inode, file); + kfree(iter); + return 0; +} + +static int tracing_open(struct inode *inode, struct file *file) +{ + int ret; + + __tracing_open(inode, file, &ret); + + return ret; +} + +static int tracing_lt_open(struct inode *inode, struct file *file) +{ + struct tracing_iterator *iter; + int ret; + + iter = __tracing_open(inode, file, &ret); + + if (!ret) + iter->iter_flags |= TRACE_FILE_LAT_FMT; + + return ret; +} + + +static void notrace * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct trace_types_struct *t = m->private; + + (*pos)++; + + if (t) + t = t->next; + + m->private = t; + + return t; +} + +static void *t_start(struct seq_file *m, loff_t *pos) +{ + struct trace_types_struct *t = m->private; + loff_t l = 0; + + mutex_lock(&trace_types_lock); + for (; t && l < *pos; t = t_next(m, t, &l)) + ; + + return t; +} + +static void t_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&trace_types_lock); +} + +static int t_show(struct seq_file *m, void *v) +{ + struct trace_types_struct *t = v; + + if (!t) + return 0; + + seq_printf(m, "%s", t->name); + if (t->next) + seq_putc(m, ' '); + else + seq_putc(m, '\n'); + + return 0; +} + +static struct seq_operations show_traces_seq_ops = { + .start = t_start, + .next = t_next, + .stop = t_stop, + .show = t_show, +}; + +static int show_traces_open(struct inode *inode, struct file *file) +{ + int ret; + + ret = seq_open(file, &show_traces_seq_ops); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = trace_types; + } + + return ret; +} + +static struct file_operations tracing_fops = { + .open = tracing_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, +}; + +static struct file_operations tracing_lt_fops = { + .open = tracing_lt_open, + .read = seq_read, + .llseek = seq_lseek, + .release = tracing_release, +}; + +static struct file_operations show_traces_fops = { + .open = show_traces_open, + .read = seq_read, + .release = seq_release, +}; + +static ssize_t tracing_iter_ctrl_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char *buf; + int r = 0; + int len = 0; + int i; + + /* calulate max size */ + for (i = 0; trace_options[i]; i++) { + len += strlen(trace_options[i]); + len += 3; /* "no" and space */ + } + + /* +2 for \n and \0 */ + buf = kmalloc(len + 2, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (i = 0; trace_options[i]; i++) { + if (trace_flags & (1 << i)) + r += sprintf(buf + r, "%s ", trace_options[i]); + else + r += sprintf(buf + r, "no%s ", trace_options[i]); + } + + r += sprintf(buf + r, "\n"); + WARN_ON(r >= len + 2); + + r = simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); + + kfree(buf); + + return r; +} + +static ssize_t tracing_iter_ctrl_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[64]; + char *cmp = buf; + int neg = 0; + int i; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (strncmp(buf, "no", 2) == 0) { + neg = 1; + cmp += 2; + } + + for (i = 0; trace_options[i]; i++) { + int len = strlen(trace_options[i]); + + if (strncmp(cmp, trace_options[i], len) == 0) { + if (neg) + trace_flags &= ~(1 << i); + else + trace_flags |= (1 << i); + break; + } + } + + filp->f_pos += cnt; + + return cnt; +} + +static struct file_operations tracing_iter_fops = { + .open = tracing_open_generic, + .read = tracing_iter_ctrl_read, + .write = tracing_iter_ctrl_write, +}; + +static ssize_t tracing_ctrl_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct tracing_trace *tr = filp->private_data; + char buf[64]; + int r; + + r = sprintf(buf, "%ld\n", tr->ctrl); + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t tracing_ctrl_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct tracing_trace *tr = filp->private_data; + long val; + char buf[64]; + + if (cnt > 63) + cnt = 63; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + val = simple_strtoul(buf, NULL, 10); + + val = !!val; + + if (tr->ctrl ^ val) { + if (val) + trace_enabled = 1; + else + trace_enabled = 0; + + tr->ctrl = val; + + mutex_lock(&trace_types_lock); + if (current_trace->ctrl_update) + current_trace->ctrl_update(tr); + mutex_unlock(&trace_types_lock); + } + + filp->f_pos += cnt; + + return cnt; +} + +static ssize_t tracing_set_trace_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[max_tracer_type_len+2]; + int r; + + mutex_lock(&trace_types_lock); + if (current_trace) + r = sprintf(buf, "%s\n", current_trace->name); + else + r = sprintf(buf, "\n"); + mutex_unlock(&trace_types_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, + buf, r); +} + +static ssize_t tracing_set_trace_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct tracing_trace *tr = &tracer_trace; + struct trace_types_struct *t; + char buf[max_tracer_type_len+1]; + int i; + + if (cnt > max_tracer_type_len) + cnt = max_tracer_type_len; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + /* strip ending whitespace. */ + for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) + buf[i] = 0; + + mutex_lock(&trace_types_lock); + for (t = trace_types; t; t = t->next) { + if (strcmp(t->name, buf) == 0) + break; + } + if (!t || t == current_trace) + goto out; + + if (current_trace && current_trace->reset) + current_trace->reset(tr); + + current_trace = t; + if (t->init) + t->init(tr); + + out: + mutex_unlock(&trace_types_lock); + + filp->f_pos += cnt; + + return cnt; +} + +static struct file_operations tracing_ctrl_fops = { + .open = tracing_open_generic, + .read = tracing_ctrl_read, + .write = tracing_ctrl_write, +}; + +static struct file_operations set_tracer_fops = { + .open = tracing_open_generic, + .read = tracing_set_trace_read, + .write = tracing_set_trace_write, +}; + +static struct dentry *d_tracer; + +struct dentry *tracing_init_dentry(void) +{ + static int once; + + if (d_tracer) + return d_tracer; + + d_tracer = debugfs_create_dir("tracing", NULL); + + if (!d_tracer && !once) { + once = 1; + pr_warning("Could not create debugfs directory 'tracing'\n"); + return NULL; + } + + return d_tracer; +} + +static __init void tracer_init_debugfs(void) +{ + struct dentry *d_tracer; + struct dentry *entry; + + d_tracer = tracing_init_dentry(); + + entry = debugfs_create_file("trace_ctrl", 0644, d_tracer, + &tracer_trace, &tracing_ctrl_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace_ctrl' entry\n"); + + entry = debugfs_create_file("iter_ctrl", 0644, d_tracer, + NULL, &tracing_iter_fops); + if (!entry) + pr_warning("Could not create debugfs 'iter_ctrl' entry\n"); + + entry = debugfs_create_file("latency_trace", 0444, d_tracer, + &tracer_trace, &tracing_lt_fops); + if (!entry) + pr_warning("Could not create debugfs 'latency_trace' entry\n"); + + entry = debugfs_create_file("trace", 0444, d_tracer, + &tracer_trace, &tracing_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("available_tracers", 0444, d_tracer, + &tracer_trace, &show_traces_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); + + entry = debugfs_create_file("current_tracer", 0444, d_tracer, + &tracer_trace, &set_tracer_fops); + if (!entry) + pr_warning("Could not create debugfs 'trace' entry\n"); +} + +/* dummy trace to disable tracing */ +static struct trace_types_struct disable_trace __read_mostly = +{ + .name = "disable", +}; + +static inline notrace int page_order(const unsigned long size) +{ + const unsigned long nr_pages = DIV_ROUND_UP(size, PAGE_SIZE); + return ilog2(roundup_pow_of_two(nr_pages)); +} + +__init static int tracer_alloc_buffers(void) +{ + const int order = page_order(trace_nr_entries * TRACING_ENTRY_SIZE); + const unsigned long size = (1UL << order) << PAGE_SHIFT; + struct tracing_entry *array; + int i; + + for_each_possible_cpu(i) { + tracer_trace.data[i] = &per_cpu(tracer_trace_cpu, i); + array = (struct tracing_entry *) + __get_free_pages(GFP_KERNEL, order); + if (array == NULL) { + printk(KERN_ERR "tracer: failed to allocate" + " %ld bytes for trace buffer!\n", size); + goto free_buffers; + } + tracer_trace.data[i]->trace = array; + } + + /* + * Since we allocate by orders of pages, we may be able to + * round up a bit. + */ + tracer_trace.entries = size / TRACING_ENTRY_SIZE; + + pr_info("tracer: %ld bytes allocated for %ld", + size, trace_nr_entries); + pr_info(" entries of %ld bytes\n", (long)TRACING_ENTRY_SIZE); + pr_info(" actual entries %ld\n", tracer_trace.entries); + + tracer_init_debugfs(); + + register_trace(&disable_trace); + + return 0; + + free_buffers: + for (i-- ; i >= 0; i--) { + struct tracing_trace_cpu *data = tracer_trace.data[i]; + + if (data && data->trace) { + free_pages((unsigned long)data->trace, order); + data->trace = NULL; + } + } + return -ENOMEM; +} + +device_initcall(tracer_alloc_buffers); Index: linux-mcount.git/lib/tracing/tracer.h =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-mcount.git/lib/tracing/tracer.h 2008-01-29 18:06:25.000000000 -0500 @@ -0,0 +1,96 @@ +#ifndef _LINUX_MCOUNT_TRACER_H +#define _LINUX_MCOUNT_TRACER_H + +#include +#include +#include + +struct tracing_function { + unsigned long ip; + unsigned long parent_ip; +}; + +struct tracing_entry { + char type; + char cpu; /* who will want to trace more than 256 CPUS? */ + char flags; + char preempt_count; /* assumes PREEMPT_MASK is 8 bits or less */ + int pid; + cycle_t t; + char comm[TASK_COMM_LEN]; + struct tracing_function fn; +}; + +struct tracing_trace_cpu { + void *trace; + unsigned long trace_idx; + atomic_t disabled; + atomic_t underrun; + unsigned long saved_latency; + unsigned long critical_start; + unsigned long critical_end; + unsigned long critical_sequence; + unsigned long nice; + unsigned long policy; + unsigned long rt_priority; + cycle_t preempt_timestamp; + pid_t pid; + uid_t uid; + char comm[TASK_COMM_LEN]; +}; + +struct tracing_iterator; + +struct tracing_trace { + unsigned long entries; + long ctrl; + int cpu; + cycle_t time_start; + struct tracing_trace_cpu *data[NR_CPUS]; +}; + +struct trace_types_struct { + const char *name; + void (*init)(struct tracing_trace *tr); + void (*reset)(struct tracing_trace *tr); + void (*open)(struct tracing_iterator *iter); + void (*close)(struct tracing_iterator *iter); + void (*start)(struct tracing_iterator *iter); + void (*stop)(struct tracing_iterator *iter); + void (*ctrl_update)(struct tracing_trace *tr); + struct trace_types_struct *next; +}; + +struct tracing_iterator { + struct tracing_trace *tr; + struct trace_types_struct *trace; + struct tracing_entry *ent; + unsigned long iter_flags; + loff_t pos; + unsigned long next_idx[NR_CPUS]; + int cpu; + int idx; +}; + +#define TRACING_ENTRY_SIZE sizeof(struct tracing_entry) + +void notrace tracing_reset(struct tracing_trace_cpu *data); +int tracing_open_generic(struct inode *inode, struct file *filp); +struct dentry *tracing_init_dentry(void); +void tracing_function_trace(struct tracing_trace *tr, + struct tracing_trace_cpu *data, + unsigned long ip, + unsigned long parent_ip, + unsigned long flags); + +void tracing_start_function_trace(void); +void tracing_stop_function_trace(void); +int register_trace(struct trace_types_struct *type); +void unregister_trace(struct trace_types_struct *type); + +static inline notrace cycle_t now(void) +{ + return get_monotonic_cycles(); +} + +#endif /* _LINUX_MCOUNT_TRACER_H */ Index: linux-mcount.git/lib/Makefile =================================================================== --- linux-mcount.git.orig/lib/Makefile 2008-01-29 17:26:18.000000000 -0500 +++ linux-mcount.git/lib/Makefile 2008-01-29 17:26:43.000000000 -0500 @@ -68,6 +68,7 @@ obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_MCOUNT) += tracing/ +obj-$(CONFIG_TRACING) += tracing/ lib-$(CONFIG_GENERIC_BUG) += bug.o Index: linux-mcount.git/lib/tracing/trace_function.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-mcount.git/lib/tracing/trace_function.c 2008-01-29 18:06:24.000000000 -0500 @@ -0,0 +1,72 @@ +/* + * ring buffer based mcount tracer + * + * Copyright (C) 2007 Steven Rostedt + * + * Based on code from the latency_tracer, that is: + * + * Copyright (C) 2004-2006 Ingo Molnar + * Copyright (C) 2004 William Lee Irwin III + */ +#include +#include +#include +#include + +#include "tracer.h" + +static notrace void function_reset(struct tracing_trace *tr) +{ + int cpu; + + tr->time_start = now(); + + for_each_online_cpu(cpu) + tracing_reset(tr->data[cpu]); +} + +static notrace void start_function_trace(struct tracing_trace *tr) +{ + function_reset(tr); + tracing_start_function_trace(); +} + +static notrace void stop_function_trace(struct tracing_trace *tr) +{ + tracing_stop_function_trace(); +} + +static notrace void function_trace_init(struct tracing_trace *tr) +{ + if (tr->ctrl) + start_function_trace(tr); +} + +static notrace void function_trace_reset(struct tracing_trace *tr) +{ + if (tr->ctrl) + stop_function_trace(tr); +} + +static notrace void function_trace_ctrl_update(struct tracing_trace *tr) +{ + if (tr->ctrl) + start_function_trace(tr); + else + stop_function_trace(tr); +} + +static struct trace_types_struct function_trace __read_mostly = +{ + .name = "function", + .init = function_trace_init, + .reset = function_trace_reset, + .ctrl_update = function_trace_ctrl_update, +}; + +static __init int init_function_trace(void) +{ + return register_trace(&function_trace); +} + +device_initcall(init_function_trace); -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/