lists.openwall.net | lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC | |
Open Source and information security mailing list archives
| ||
|
Date: Sat, 07 Feb 2009 12:47:16 +0100 From: Mike Galbraith <efault@....de> To: Ingo Molnar <mingo@...e.hu> Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>, Arjan van de Ven <arjan@...radead.org>, Thomas Gleixner <tglx@...utronix.de>, Paul Mackerras <paulus@...ba.org>, LKML <linux-kernel@...r.kernel.org> Subject: kerneltop "enhancements" Greetings, Dunno if you're interested in any of this... While tinkering, trying to figure out why I receive more events when using -C 0 than when profiling all CPUs, I found that it's because of blocking reads jamming up the loop. I also noticed that when using poll, my event count when not using -C N would rise up to the expected level, but it maxed out at around 25k events/sec for the whole box. That turned out to be... if (!event_array[i][counter].revents) continue; I guess I need to look into that a bit more, but without it, using poll to stop the loop only once we've read all we can get with non-blocking reads, things improved a bunch. However, when only one CPU was busy, I ended up with default_idle screwing things up. Below is what I did about all this. Now, with netperf TCP_RR running on one CPU... ------------------------------------------------------------------------------ KernelTop: 23686 irqs/sec kernel:95.8% [NMI, 100000 CPU cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ events RIP kernel function ______ ______ ________________ _______________ 6972.00 - ffffffff803f1c66 : tcp_ack 6290.00 - ffffffff8047bb7f : __schedule 4733.00 - ffffffff803ec4df : tcp_sendmsg 4437.00 - ffffffff803f5f96 : tcp_transmit_skb 4080.00 - ffffffff8047df48 : _spin_lock_irqsave 3685.00 - ffffffff8020a6a8 : __switch_to 3502.00 - ffffffff8047e0ac : _spin_lock_bh 3477.00 - ffffffff803eb543 : tcp_recvmsg 3227.00 - ffffffff803fbcbf : tcp_v4_rcv 3091.00 - ffffffff803f3df5 : tcp_rcv_established 3052.00 - ffffffff8020be10 : system_call 2376.00 - ffffffff8047dfd2 : _spin_lock 2153.00 - ffffffff803c2630 : net_rx_action 2129.00 - ffffffff803e7526 : __inet_lookup_established 2071.00 - ffffffff803c3b28 : netif_receive_skb 2055.00 - ffffffff8023ce9f : local_bh_enable 2043.00 - ffffffff80211c88 : native_sched_clock 2010.00 - ffffffff803e069a : ip_rcv 1910.00 - ffffffff8047e01e : _spin_unlock_irqrestore ..or on all CPUS.. ------------------------------------------------------------------------------ KernelTop: 93783 irqs/sec kernel:96.0% [NMI, 100000 CPU cycles], (all, 4 CPUs) ------------------------------------------------------------------------------ events RIP kernel function ______ ______ ________________ _______________ 30798.00 - ffffffff803f1c66 : tcp_ack 26258.00 - ffffffff803ec4df : tcp_sendmsg 20500.00 - ffffffff8047bb7f : __schedule 19242.00 - ffffffff803f5f96 : tcp_transmit_skb 14895.00 - ffffffff803eb543 : tcp_recvmsg 13926.00 - ffffffff8047e0ac : _spin_lock_bh 13015.00 - ffffffff803f3df5 : tcp_rcv_established 12699.00 - ffffffff803e4945 : ip_queue_xmit 12213.00 - ffffffff8047df48 : _spin_lock_irqsave 11770.00 - ffffffff803fbcbf : tcp_v4_rcv 11585.00 - ffffffff8020a6a8 : __switch_to 11391.00 - ffffffff8020be10 : system_call 9522.00 - ffffffff803e4fdc : ip_finish_output 9247.00 - ffffffff803f83a8 : tcp_write_xmit 8764.00 - ffffffff803c851a : dst_release 8597.00 - ffffffff8023ce9f : local_bh_enable 8301.00 - ffffffff8047dfd2 : _spin_lock 8261.00 - ffffffff803c2630 : net_rx_action 8224.00 - ffffffff803c3b28 : netif_receive_skb BTW, how does one convince getopt_long() that an option really really doesn't require an argument? no_argument works for the long version, but it insists that an argument is required for the short version regardless. --- kerneltop.c.org 2009-02-05 09:54:23.000000000 +0100 +++ kerneltop.c 2009-02-07 11:53:59.000000000 +0100 @@ -199,6 +199,7 @@ static unsigned long filter_start; static unsigned long filter_end; static int delay_secs = 2; +static int zero; static int dump_symtab; struct source_line { @@ -232,7 +233,8 @@ static void display_help(void) " -f CNT --filter=CNT # min-event-count filter [default: 100]\n\n" " -x path --vmlinux=<path> # the vmlinux binary, for -s use:\n" " -s symbol --symbol=<symbol> # function to be showed annotated one-shot\n" - " -D 1 --dump_symtab=1 # dump symbol table to stderr on startup\n" + " -z 1 --zero # zero counts after display\n" + " -D 1 --dump_symtab # dump symbol table to stderr on startup\n" "\n"); exit(0); @@ -257,10 +259,11 @@ static void process_options(int argc, ch {"pid", required_argument, NULL, 'p'}, {"vmlinux", required_argument, NULL, 'x'}, {"symbol", required_argument, NULL, 's'}, + {"zero", no_argument, NULL, 'z'}, {"dump_symtab", required_argument, NULL, 'D'}, {NULL, 0, NULL, 0 } }; - int c = getopt_long(argc, argv, "c:C:d:e:f:g:n:p:s:x:D:", + int c = getopt_long(argc, argv, "c:C:d:e:f:g:n:p:s:x:z:D:", long_options, &option_index); if (c == -1) break; @@ -284,6 +287,7 @@ static void process_options(int argc, ch case 'n': nmi = atoi(optarg); break; case 'p': tid = atoi(optarg); break; case 's': sym_filter = strdup(optarg); break; + case 'z': zero = 1; break; case 'x': vmlinux = strdup(optarg); break; case 'D': dump_symtab = atoi(optarg); break; default: error = 1; break; @@ -314,6 +318,7 @@ struct sym_entry { unsigned long long addr; char *sym; unsigned long count[MAX_COUNTERS]; + int skip; GList *source; }; @@ -357,8 +362,6 @@ static long events; static long userspace_events; static const char CONSOLE_CLEAR[] = ".[H.[2J"; -#define USE_POLL 0 - static struct sym_entry tmp[MAX_SYMS]; static void print_sym_table(void) @@ -445,7 +448,7 @@ static void print_sym_table(void) * Add decay to the counts: */ for (count = 0; count < nr_counters; count++) - sym_table[i].count[count] = sym_table[i].count[count] * 7 / 8; + sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8; } if (sym_filter_entry) @@ -489,18 +492,13 @@ static int read_symbol(FILE *in, struct sym = str; - if (!strcmp(sym, "_text")) { - min_ip = s->addr; + /* Filter out known duplicates and non-text symbols. */ + if (!strcmp(sym, "_text")) return 1; - } - if (!min_ip && !strcmp(sym, "_stext")) { - min_ip = s->addr; + if (!min_ip && !strcmp(sym, "_stext")) return 1; - } if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext")) return 1; - - /* Filter out known duplicates and non-text symbols. */ if (stype != 'T' && stype != 't') return 1; if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14)) @@ -512,6 +510,13 @@ static int read_symbol(FILE *in, struct assert(s->sym); strcpy((char *)s->sym, str); + s->skip = 0; + + /* Tag events to be skipped. */ + if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym)) + s->skip = 1; + if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym)) + s->skip = 1; if (filter_match == 1) { filter_end = s->addr; @@ -783,18 +788,14 @@ static void record_ip(uint64_t ip, int c idx = left_idx; - sym_table[idx].count[counter]++; + if (!sym_table[idx].skip) + sym_table[idx].count[counter]++; + else events--; } -static const int event_threshold = 10000; - static void process_event(uint64_t ip, int counter) { events++; - if (time(NULL) >= last_refresh + delay_secs) { - print_sym_table(); - events = userspace_events = 0; - } if (ip < min_ip || ip > max_ip) { userspace_events++; @@ -813,9 +814,7 @@ int main(int argc, char *argv[]) unsigned int cpu; uint64_t ip; ssize_t res; -#if USE_POLL int ret; -#endif process_options(argc, argv); @@ -840,6 +839,7 @@ int main(int argc, char *argv[]) hw_event.nmi = nmi; fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd); + fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); if (fd[i][counter] < 0) { printf("kerneltop error: syscall returned with %d (%s)\n", fd[i][counter], strerror(-fd[i][counter])); @@ -868,23 +868,27 @@ int main(int argc, char *argv[]) last_refresh = time(NULL); while (1) { -#if USE_POLL - ret = poll(event_array, nr_cpus, 1000); -#endif + int hits = events; for (i = 0; i < nr_cpus; i++) { for (counter = 0; counter < nr_counters; counter++) { -#if USE_POLL - if (!event_array[i][counter].revents) - continue; -#endif - res = read(fd[i][counter], (char *) &ip, sizeof(ip)); - assert(res == sizeof(ip)); + if (res > 0) { + assert(res == sizeof(ip)); - process_event(ip, counter); + process_event(ip, counter); + } } } + + if (time(NULL) >= last_refresh + delay_secs) { + print_sym_table(); + events = userspace_events = 0; + } + + if (hits == events) + ret = poll(event_array, nr_cpus, 1000); + hits = events; } return 0; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@...r.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists