[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1248331306.13472.25.camel@marge.simson.net>
Date: Thu, 23 Jul 2009 08:41:46 +0200
From: Mike Galbraith <efault@....de>
To: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: LKML <linux-kernel@...r.kernel.org>, Ingo Molnar <mingo@...e.hu>
Subject: Re: [patch] perf_counter tools: resurrect perf top annotation in a
simple interactive form
Good morning,
On Wed, 2009-07-22 at 20:36 +0200, Mike Galbraith wrote:
> On Wed, 2009-07-22 at 18:46 +0200, Mike Galbraith wrote:
> > On Wed, 2009-07-22 at 16:49 +0200, Peter Zijlstra wrote:
> > > ok, so I tried this, but the screen updates overwrite this input
> > > screen before I can read it ;-)
> >
> > That's not an input screen, it's a legend shown if you poke an unknown
> > key. Poking a known key prompts for input.
>
> Hm, that is lame though, so updated patch below. If an unknown key is
> poked, display known keys, and wait for the user to poke one that _is_
> known. If you know the keys, you don't have to look at annoying legend,
> if you forget, tap space or whatever.
I leered at it again this morning, and changed it a wee bit more.
Hitting an unknown key displays keys and current settings, and prompts
for input. Hitting space allows you to cancel entry.
Better? Anything else fugly?
I've also now added a hotkey [w] to switch between weighted profile
display when multiple counters are available, or the current counter
selected via [E]vent. I'll post that if this one is acceptable.
(I noticed that inherit patchlet swam upstream, so I guess I'll need to
rebase and resubmit once tip catches up?)
perf_counter tools: resurrect perf top annotation in a simple interactive form.
This patch allows the user to select any symbol in kernel space for source level
annotation on the fly, switch between event counters and alter display variables.
When symbol details are being displayed, stopping annotation reverts to normal.
known keys:
[d] display refresh delay. (1)
[e] display entries (lines). (15)
[E] annotate event counter. (cycles)
[f] profile display filter (count). (1)
[F] annotate display filter (percent). (5%)
[qQ] quit.
[s] annotate symbol. (NULL)
[S] stop annotation.
[z] toggle count zeroing. (0)
Sample:
------------------------------------------------------------------------------
PerfTop: 16719 irqs/sec kernel:78.7% [cache-misses/cache-references/instructions/cycles], (all, 4 CPUs)
------------------------------------------------------------------------------
Showing cache-misses for e1000_clean_rx_irq
Events Pcnt (>=3%)
0 0.0% /* adjust length to remove Ethernet CRC */
0 0.0% if (!(adapter->flags2 & FLAG2_CRC_STRIPPING))
0 0.0% length -= 4;
436 5.0% f039: 41 f6 84 24 5c 29 00 testb $0x1,0x295c(%r12)
0 0.0% f089: 8b 4d 84 mov -0x7c(%rbp),%ecx
0 0.0% f08c: 48 83 ef 02 sub $0x2,%rdi
0 0.0% f090: 48 83 ee 02 sub $0x2,%rsi
811 9.3% f094: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
0 0.0%
0 0.0% while (rx_desc->status & E1000_RXD_STAT_DD) {
0 0.0% f114: 41 f6 47 0c 01 testb $0x1,0xc(%r15)
7226 82.6% f119: 0f 85 24 fe ff ff jne ef43 <e1000_clean_rx_irq+0x84>
Available events:
0 cache-misses
1 cache-references
2 instructions
3 cycles
Enter details event counter: 2
------------------------------------------------------------------------------
PerfTop: 15035 irqs/sec kernel:79.0% [cache-misses/cache-references/instructions/cycles], (all, 4 CPUs)
------------------------------------------------------------------------------
Showing instructions for e1000_clean_rx_irq
Events Pcnt (>=3%)
0 0.0% int *work_done, int work_to_do)
0 0.0% {
175 0.9% eebf: 55 push %rbp
1898 9.8% eec0: 48 89 e5 mov %rsp,%rbp
0 0.0%
0 0.0% i = rx_ring->next_to_clean;
140 0.7% ef0a: 0f b7 41 1a movzwl 0x1a(%rcx),%eax
670 3.4% ef0e: 89 45 ac mov %eax,-0x54(%rbp)
0 0.0% {
0 0.0% memcpy(skb->data + offset, from, len);
91 0.5% f07b: 49 8b b6 e8 00 00 00 mov 0xe8(%r14),%rsi
1153 5.9% f082: 48 8b b8 e8 00 00 00 mov 0xe8(%rax),%rdi
42 0.2% f089: 8b 4d 84 mov -0x7c(%rbp),%ecx
14 0.1% f08c: 48 83 ef 02 sub $0x2,%rdi
0 0.0% f090: 48 83 ee 02 sub $0x2,%rsi
1618 8.3% f094: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
0 0.0%
0 0.0% /* return some buffers to hardware, one at a time is too slow */
0 0.0% if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
867 4.5% f0e7: 83 7d b0 0f cmpl $0xf,-0x50(%rbp)
0 0.0%
0 0.0% while (rx_desc->status & E1000_RXD_STAT_DD) {
37 0.2% f114: 41 f6 47 0c 01 testb $0x1,0xc(%r15)
4047 20.8% f119: 0f 85 24 fe ff ff jne ef43 <e1000_clean_rx_irq+0x84>
Signed-off-by: Mike Galbraith <efault@....de>
Cc: Ingo Molnar <mingo@...e.hu>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>
LKML-Reference: <new-submission>
---
tools/perf/builtin-top.c | 504 +++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 465 insertions(+), 39 deletions(-)
Index: linux-2.6/tools/perf/builtin-top.c
===================================================================
--- linux-2.6.orig/tools/perf/builtin-top.c
+++ linux-2.6/tools/perf/builtin-top.c
@@ -31,6 +31,8 @@
#include <fcntl.h>
#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
#include <errno.h>
#include <time.h>
@@ -54,7 +56,7 @@ static int system_wide = 0;
static int default_interval = 100000;
-static u64 count_filter = 5;
+static int count_filter = 5;
static int print_entries = 15;
static int target_pid = -1;
@@ -68,15 +70,27 @@ static int freq = 0;
static int verbose = 0;
static char *vmlinux = NULL;
-static char *sym_filter;
-static unsigned long filter_start;
-static unsigned long filter_end;
-
static int delay_secs = 2;
static int zero;
static int dump_symtab;
/*
+ * Source
+ */
+
+struct source_line {
+ u64 eip;
+ unsigned long count[MAX_COUNTERS];
+ char *line;
+ struct source_line *next;
+};
+
+static char *sym_filter = NULL;
+struct sym_entry *sym_filter_entry = NULL;
+static int sym_pcnt_filter = 5;
+static int sym_counter = 0;
+
+/*
* Symbols
*/
@@ -90,9 +104,237 @@ struct sym_entry {
unsigned long snap_count;
double weight;
int skip;
+ struct source_line *source;
+ struct source_line *lines;
+ struct source_line **lines_tail;
+ pthread_mutex_t source_lock;
};
-struct sym_entry *sym_filter_entry;
+/*
+ * Source functions
+ */
+
+static void parse_source(struct sym_entry *syme)
+{
+ struct symbol *sym;
+ struct module *module;
+ struct section *section = NULL;
+ FILE *file;
+ char command[PATH_MAX*2], *path = vmlinux;
+ u64 start, end, len;
+
+ if (!syme)
+ return;
+
+ if (syme->lines) {
+ pthread_mutex_lock(&syme->source_lock);
+ goto out_assign;
+ }
+
+ sym = (struct symbol *)(syme + 1);
+ module = sym->module;
+
+ if (module)
+ path = module->path;
+ if (!path)
+ return;
+
+ start = sym->obj_start;
+ if (!start)
+ start = sym->start;
+
+ if (module) {
+ section = module->sections->find_section(module->sections, ".text");
+ if (section)
+ start -= section->vma;
+ }
+
+ end = start + sym->end - sym->start + 1;
+ len = sym->end - sym->start;
+
+ sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
+
+ file = popen(command, "r");
+ if (!file)
+ return;
+
+ pthread_mutex_lock(&syme->source_lock);
+ syme->lines_tail = &syme->lines;
+ while (!feof(file)) {
+ struct source_line *src;
+ size_t dummy = 0;
+ char *c;
+
+ src = malloc(sizeof(struct source_line));
+ assert(src != NULL);
+ memset(src, 0, sizeof(struct source_line));
+
+ if (getline(&src->line, &dummy, file) < 0)
+ break;
+ if (!src->line)
+ break;
+
+ c = strchr(src->line, '\n');
+ if (c)
+ *c = 0;
+
+ src->next = NULL;
+ *syme->lines_tail = src;
+ syme->lines_tail = &src->next;
+
+ if (strlen(src->line)>8 && src->line[8] == ':') {
+ src->eip = strtoull(src->line, NULL, 16);
+ if (section)
+ src->eip += section->vma;
+ }
+ if (strlen(src->line)>8 && src->line[16] == ':') {
+ src->eip = strtoull(src->line, NULL, 16);
+ if (section)
+ src->eip += section->vma;
+ }
+ }
+ pclose(file);
+out_assign:
+ sym_filter_entry = syme;
+ pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void __zero_source_counters(struct sym_entry *syme)
+{
+ int i;
+ struct source_line *line;
+
+ line = syme->lines;
+ while (line) {
+ for (i = 0; i < nr_counters; i++)
+ line->count[i] = 0;
+ line = line->next;
+ }
+}
+
+static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
+{
+ struct source_line *line;
+
+ if (syme != sym_filter_entry)
+ return;
+
+ if (pthread_mutex_trylock(&syme->source_lock))
+ return;
+
+ if (!syme->source)
+ goto out_unlock;
+
+ for (line = syme->lines; line; line = line->next) {
+ if (line->eip == ip) {
+ line->count[counter]++;
+ break;
+ }
+ if (line->eip > ip)
+ break;
+ }
+out_unlock:
+ pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void lookup_sym_source(struct sym_entry *syme)
+{
+ struct symbol *symbol = (struct symbol *)(syme + 1);
+ struct source_line *line;
+ char pattern[PATH_MAX];
+ char *idx;
+
+ sprintf(pattern, "<%s>:", symbol->name);
+
+ if (symbol->module) {
+ idx = strstr(pattern, "\t");
+ if (idx)
+ *idx = 0;
+ }
+
+ pthread_mutex_lock(&syme->source_lock);
+ for (line = syme->lines; line; line = line->next) {
+ if (strstr(line->line, pattern)) {
+ syme->source = line;
+ break;
+ }
+ }
+ pthread_mutex_unlock(&syme->source_lock);
+}
+
+static void show_lines(struct source_line *queue, int count, int total)
+{
+ int i;
+ struct source_line *line;
+
+ line = queue;
+ for (i = 0; i < count; i++) {
+ float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
+
+ printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
+ line = line->next;
+ }
+}
+
+#define TRACE_COUNT 3
+
+static void show_details(struct sym_entry *syme)
+{
+ struct symbol *symbol;
+ struct source_line *line;
+ struct source_line *line_queue = NULL;
+ int displayed = 0;
+ int line_queue_count = 0, total = 0, more = 0;
+
+ if (!syme)
+ return;
+
+ if (!syme->source)
+ lookup_sym_source(syme);
+
+ if (!syme->source)
+ return;
+
+ symbol = (struct symbol *)(syme + 1);
+ printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
+ printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
+
+ pthread_mutex_lock(&syme->source_lock);
+ line = syme->source;
+ while (line) {
+ total += line->count[sym_counter];
+ line = line->next;
+ }
+
+ line = syme->source;
+ while (line) {
+ float pcnt = 0.0;
+
+ if (!line_queue_count)
+ line_queue = line;
+ line_queue_count++;
+
+ if (line->count[sym_counter])
+ pcnt = 100.0 * line->count[sym_counter] / (float)total;
+ if (pcnt >= (float)sym_pcnt_filter) {
+ if (displayed <= print_entries)
+ show_lines(line_queue, line_queue_count, total);
+ else more++;
+ displayed += line_queue_count;
+ line_queue_count = 0;
+ line_queue = NULL;
+ } else if (line_queue_count > TRACE_COUNT) {
+ line_queue = line_queue->next;
+ line_queue_count--;
+ }
+
+ line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
+ line = line->next;
+ }
+ pthread_mutex_unlock(&syme->source_lock);
+ if (more)
+ printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+}
struct dso *kernel_dso;
@@ -227,6 +469,11 @@ static void print_sym_table(void)
printf("------------------------------------------------------------------------------\n\n");
+ if (sym_filter_entry) {
+ show_details(sym_filter_entry);
+ return;
+ }
+
if (nr_counters == 1)
printf(" samples pcnt");
else
@@ -241,7 +488,7 @@ static void print_sym_table(void)
struct symbol *sym = (struct symbol *)(syme + 1);
double pcnt;
- if (++printed > print_entries || syme->snap_count < count_filter)
+ if (++printed > print_entries || (int)syme->snap_count < count_filter)
continue;
pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
@@ -260,19 +507,218 @@ static void print_sym_table(void)
}
}
+static void prompt_integer(int *target, const char *msg)
+{
+ char *buf = malloc(0), *p;
+ size_t dummy = 0;
+ int tmp;
+
+ fprintf(stdout, "\n%s: ", msg);
+ if (getline(&buf, &dummy, stdin) < 0)
+ return;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = 0;
+
+ p = buf;
+ while(*p) {
+ if (!isdigit(*p))
+ goto out_free;
+ p++;
+ }
+ tmp = strtoul(buf, NULL, 10);
+ *target = tmp;
+out_free:
+ free(buf);
+}
+
+static void prompt_percent(int *target, const char *msg)
+{
+ int tmp = 0;
+
+ prompt_integer(&tmp, msg);
+ if (tmp >= 0 && tmp <= 100)
+ *target = tmp;
+}
+
+static void prompt_symbol(struct sym_entry **target, const char *msg)
+{
+ char *buf = malloc(0), *p;
+ struct sym_entry *syme = *target, *n, *found = NULL;
+ size_t dummy = 0;
+
+ /* zero counters of active symbol */
+ if (syme) {
+ pthread_mutex_lock(&syme->source_lock);
+ __zero_source_counters(syme);
+ *target = NULL;
+ pthread_mutex_unlock(&syme->source_lock);
+ }
+
+ fprintf(stdout, "\n%s: ", msg);
+ if (getline(&buf, &dummy, stdin) < 0)
+ goto out_free;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = 0;
+
+ pthread_mutex_lock(&active_symbols_lock);
+ syme = list_entry(active_symbols.next, struct sym_entry, node);
+ pthread_mutex_unlock(&active_symbols_lock);
+
+ list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
+ struct symbol *sym = (struct symbol *)(syme + 1);
+
+ if (!strcmp(buf, sym->name)) {
+ found = syme;
+ break;
+ }
+ }
+
+ if (!found) {
+ fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
+ sleep(1);
+ return;
+ } else
+ parse_source(found);
+
+out_free:
+ free(buf);
+}
+
+static void print_known_keys(void)
+{
+ char *name = NULL;
+
+ if (sym_filter_entry) {
+ struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
+ name = sym->name;
+ }
+
+ fprintf(stdout, "\nknown keys:\n");
+ fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
+ fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
+ fprintf(stdout, "\t[E] annotate event counter. \t(%s)\n", event_name(sym_counter));
+ fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
+ fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
+ fprintf(stdout, "\t[qQ] quit.\n");
+ fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
+ fprintf(stdout, "\t[S] stop annotation.\n");
+ fprintf(stdout, "\t[z] toggle count zeroing. \t(%d)\n", zero ? 1 : 0);
+}
+
+static void handle_keypress(int c)
+{
+ int once = 0;
+repeat:
+ switch (c) {
+ case 'd':
+ prompt_integer(&delay_secs, "Enter display delay");
+ break;
+ case 'e':
+ prompt_integer(&print_entries, "Enter display entries (lines)");
+ break;
+ case 'E':
+ if (nr_counters > 1) {
+ int i;
+
+ fprintf(stderr, "\nAvailable events:");
+ for (i = 0; i < nr_counters; i++)
+ fprintf(stderr, "\n\t%d %s", i, event_name(i));
+
+ prompt_integer(&sym_counter, "Enter details event counter");
+
+ if (sym_counter >= nr_counters) {
+ fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
+ sym_counter = 0;
+ sleep(1);
+ }
+ } else sym_counter = 0;
+ break;
+ case 'f':
+ prompt_integer(&count_filter, "Enter display event count filter");
+ break;
+ case 'F':
+ prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
+ break;
+ case 'q':
+ case 'Q':
+ printf("exiting.\n");
+ exit(0);
+ case 's':
+ prompt_symbol(&sym_filter_entry, "Enter details symbol");
+ break;
+ case 'S':
+ if (!sym_filter_entry)
+ break;
+ else {
+ struct sym_entry *syme = sym_filter_entry;
+
+ pthread_mutex_lock(&syme->source_lock);
+ sym_filter_entry = NULL;
+ __zero_source_counters(syme);
+ pthread_mutex_unlock(&syme->source_lock);
+ }
+ break;
+ case 'z':
+ zero = ~zero;
+ break;
+ default: {
+ struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+ struct termios tc, save;
+
+ if (!once) {
+ print_known_keys();
+ fprintf(stdout, "\nEnter selection, or <SPACE> to continue: ");
+ fflush(stdout);
+ once++;
+ }
+
+ tcgetattr(0, &save);
+ tc = save;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+ tcsetattr(0, TCSANOW, &tc);
+
+ poll(&stdin_poll, 1, -1);
+ c = getc(stdin);
+
+ tcsetattr(0, TCSAFLUSH, &save);
+ if (c != ' ')
+ goto repeat;
+ }
+ }
+}
+
static void *display_thread(void *arg __used)
{
struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
- int delay_msecs = delay_secs * 1000;
+ struct termios tc, save;
+ int delay_msecs, c;
- printf("PerfTop refresh period: %d seconds\n", delay_secs);
+ tcgetattr(0, &save);
+ tc = save;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+repeat:
+ delay_msecs = delay_secs * 1000;
+ tcsetattr(0, TCSANOW, &tc);
+ /* trash return*/
+ getc(stdin);
do {
print_sym_table();
} while (!poll(&stdin_poll, 1, delay_msecs) == 1);
- printf("key pressed - exiting.\n");
- exit(0);
+ c = getc(stdin);
+ tcsetattr(0, TCSAFLUSH, &save);
+
+ handle_keypress(c);
+ goto repeat;
return NULL;
}
@@ -291,7 +737,6 @@ static const char *skip_symbols[] = {
static int symbol_filter(struct dso *self, struct symbol *sym)
{
- static int filter_match;
struct sym_entry *syme;
const char *name = sym->name;
int i;
@@ -313,6 +758,10 @@ static int symbol_filter(struct dso *sel
return 1;
syme = dso__sym_priv(self, sym);
+ pthread_mutex_init(&syme->source_lock, NULL);
+ if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
+ sym_filter_entry = syme;
+
for (i = 0; skip_symbols[i]; i++) {
if (!strcmp(skip_symbols[i], name)) {
syme->skip = 1;
@@ -320,29 +769,6 @@ static int symbol_filter(struct dso *sel
}
}
- if (filter_match == 1) {
- filter_end = sym->start;
- filter_match = -1;
- if (filter_end - filter_start > 10000) {
- fprintf(stderr,
- "hm, too large filter symbol <%s> - skipping.\n",
- sym_filter);
- fprintf(stderr, "symbol filter start: %016lx\n",
- filter_start);
- fprintf(stderr, " end: %016lx\n",
- filter_end);
- filter_end = filter_start = 0;
- sym_filter = NULL;
- sleep(1);
- }
- }
-
- if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
- filter_match = 1;
- filter_start = sym->start;
- }
-
-
return 0;
}
@@ -378,8 +804,6 @@ out_delete_dso:
return -1;
}
-#define TRACE_COUNT 3
-
/*
* Binary search in the histogram table and record the hit:
*/
@@ -392,6 +816,7 @@ static void record_ip(u64 ip, int counte
if (!syme->skip) {
syme->count[counter]++;
+ record_precise_ip(syme, counter, ip);
pthread_mutex_lock(&active_symbols_lock);
if (list_empty(&syme->node) || !syme->node.next)
__list_insert_active_sym(syme);
@@ -685,8 +1110,8 @@ static const struct option options[] = {
"only display functions with more events than this"),
OPT_BOOLEAN('g', "group", &group,
"put the counters into a counter group"),
- OPT_STRING('s', "sym-filter", &sym_filter, "pattern",
- "only display symbols matchig this pattern"),
+ OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
+ "symbol to annotate - requires -k option"),
OPT_BOOLEAN('z', "zero", &zero,
"zero history across updates"),
OPT_INTEGER('F', "freq", &freq,
@@ -729,6 +1154,7 @@ int cmd_top(int argc, const char **argv,
delay_secs = 1;
parse_symbols();
+ parse_source(sym_filter_entry);
/*
* Fill in the ones not specifically initialized via -c:
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists