]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * builtin-top.c | |
3 | * | |
4 | * Builtin top command: Display a continuously updated profile of | |
5 | * any workload, CPU or specific PID. | |
6 | * | |
7 | * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com> | |
8 | * | |
9 | * Improvements and fixes by: | |
10 | * | |
11 | * Arjan van de Ven <arjan@linux.intel.com> | |
12 | * Yanmin Zhang <yanmin.zhang@intel.com> | |
13 | * Wu Fengguang <fengguang.wu@intel.com> | |
14 | * Mike Galbraith <efault@gmx.de> | |
15 | * Paul Mackerras <paulus@samba.org> | |
16 | * | |
17 | * Released under the GPL v2. (and only v2, not any later version) | |
18 | */ | |
19 | #include "builtin.h" | |
20 | ||
21 | #include "perf.h" | |
22 | ||
23 | #include "util/symbol.h" | |
24 | #include "util/color.h" | |
25 | #include "util/thread.h" | |
26 | #include "util/util.h" | |
27 | #include <linux/rbtree.h> | |
28 | #include "util/parse-options.h" | |
29 | #include "util/parse-events.h" | |
30 | ||
31 | #include "util/debug.h" | |
32 | ||
33 | #include <assert.h> | |
34 | #include <fcntl.h> | |
35 | ||
36 | #include <stdio.h> | |
37 | #include <termios.h> | |
38 | #include <unistd.h> | |
39 | ||
40 | #include <errno.h> | |
41 | #include <time.h> | |
42 | #include <sched.h> | |
43 | #include <pthread.h> | |
44 | ||
45 | #include <sys/syscall.h> | |
46 | #include <sys/ioctl.h> | |
47 | #include <sys/poll.h> | |
48 | #include <sys/prctl.h> | |
49 | #include <sys/wait.h> | |
50 | #include <sys/uio.h> | |
51 | #include <sys/mman.h> | |
52 | ||
53 | #include <linux/unistd.h> | |
54 | #include <linux/types.h> | |
55 | ||
56 | static int fd[MAX_NR_CPUS][MAX_COUNTERS]; | |
57 | ||
58 | static int system_wide = 0; | |
59 | ||
60 | static int default_interval = 0; | |
61 | ||
62 | static int count_filter = 5; | |
63 | static int print_entries = 15; | |
64 | ||
65 | static int target_pid = -1; | |
66 | static int inherit = 0; | |
67 | static int profile_cpu = -1; | |
68 | static int nr_cpus = 0; | |
69 | static unsigned int realtime_prio = 0; | |
70 | static int group = 0; | |
71 | static unsigned int page_size; | |
72 | static unsigned int mmap_pages = 16; | |
73 | static int freq = 1000; /* 1 KHz */ | |
74 | ||
75 | static int delay_secs = 2; | |
76 | static int zero = 0; | |
77 | static int dump_symtab = 0; | |
78 | ||
79 | /* | |
80 | * Source | |
81 | */ | |
82 | ||
83 | struct source_line { | |
84 | u64 eip; | |
85 | unsigned long count[MAX_COUNTERS]; | |
86 | char *line; | |
87 | struct source_line *next; | |
88 | }; | |
89 | ||
90 | static char *sym_filter = NULL; | |
91 | struct sym_entry *sym_filter_entry = NULL; | |
92 | static int sym_pcnt_filter = 5; | |
93 | static int sym_counter = 0; | |
94 | static int display_weighted = -1; | |
95 | ||
96 | /* | |
97 | * Symbols | |
98 | */ | |
99 | ||
100 | struct sym_entry { | |
101 | struct rb_node rb_node; | |
102 | struct list_head node; | |
103 | unsigned long count[MAX_COUNTERS]; | |
104 | unsigned long snap_count; | |
105 | double weight; | |
106 | int skip; | |
107 | struct map *map; | |
108 | struct source_line *source; | |
109 | struct source_line *lines; | |
110 | struct source_line **lines_tail; | |
111 | pthread_mutex_t source_lock; | |
112 | }; | |
113 | ||
114 | /* | |
115 | * Source functions | |
116 | */ | |
117 | ||
118 | static void parse_source(struct sym_entry *syme) | |
119 | { | |
120 | struct symbol *sym; | |
121 | struct map *map; | |
122 | FILE *file; | |
123 | char command[PATH_MAX*2]; | |
124 | const char *path; | |
125 | u64 len; | |
126 | ||
127 | if (!syme) | |
128 | return; | |
129 | ||
130 | if (syme->lines) { | |
131 | pthread_mutex_lock(&syme->source_lock); | |
132 | goto out_assign; | |
133 | } | |
134 | ||
135 | sym = (struct symbol *)(syme + 1); | |
136 | map = syme->map; | |
137 | path = map->dso->long_name; | |
138 | ||
139 | len = sym->end - sym->start; | |
140 | ||
141 | sprintf(command, | |
142 | "objdump --start-address=0x%016Lx " | |
143 | "--stop-address=0x%016Lx -dS %s", | |
144 | sym->start, sym->end, path); | |
145 | ||
146 | file = popen(command, "r"); | |
147 | if (!file) | |
148 | return; | |
149 | ||
150 | pthread_mutex_lock(&syme->source_lock); | |
151 | syme->lines_tail = &syme->lines; | |
152 | while (!feof(file)) { | |
153 | struct source_line *src; | |
154 | size_t dummy = 0; | |
155 | char *c; | |
156 | ||
157 | src = malloc(sizeof(struct source_line)); | |
158 | assert(src != NULL); | |
159 | memset(src, 0, sizeof(struct source_line)); | |
160 | ||
161 | if (getline(&src->line, &dummy, file) < 0) | |
162 | break; | |
163 | if (!src->line) | |
164 | break; | |
165 | ||
166 | c = strchr(src->line, '\n'); | |
167 | if (c) | |
168 | *c = 0; | |
169 | ||
170 | src->next = NULL; | |
171 | *syme->lines_tail = src; | |
172 | syme->lines_tail = &src->next; | |
173 | ||
174 | if (strlen(src->line)>8 && src->line[8] == ':') { | |
175 | src->eip = strtoull(src->line, NULL, 16); | |
176 | src->eip += map->start; | |
177 | } | |
178 | if (strlen(src->line)>8 && src->line[16] == ':') { | |
179 | src->eip = strtoull(src->line, NULL, 16); | |
180 | src->eip += map->start; | |
181 | } | |
182 | } | |
183 | pclose(file); | |
184 | out_assign: | |
185 | sym_filter_entry = syme; | |
186 | pthread_mutex_unlock(&syme->source_lock); | |
187 | } | |
188 | ||
189 | static void __zero_source_counters(struct sym_entry *syme) | |
190 | { | |
191 | int i; | |
192 | struct source_line *line; | |
193 | ||
194 | line = syme->lines; | |
195 | while (line) { | |
196 | for (i = 0; i < nr_counters; i++) | |
197 | line->count[i] = 0; | |
198 | line = line->next; | |
199 | } | |
200 | } | |
201 | ||
202 | static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip) | |
203 | { | |
204 | struct source_line *line; | |
205 | ||
206 | if (syme != sym_filter_entry) | |
207 | return; | |
208 | ||
209 | if (pthread_mutex_trylock(&syme->source_lock)) | |
210 | return; | |
211 | ||
212 | if (!syme->source) | |
213 | goto out_unlock; | |
214 | ||
215 | for (line = syme->lines; line; line = line->next) { | |
216 | if (line->eip == ip) { | |
217 | line->count[counter]++; | |
218 | break; | |
219 | } | |
220 | if (line->eip > ip) | |
221 | break; | |
222 | } | |
223 | out_unlock: | |
224 | pthread_mutex_unlock(&syme->source_lock); | |
225 | } | |
226 | ||
227 | static void lookup_sym_source(struct sym_entry *syme) | |
228 | { | |
229 | struct symbol *symbol = (struct symbol *)(syme + 1); | |
230 | struct source_line *line; | |
231 | char pattern[PATH_MAX]; | |
232 | ||
233 | sprintf(pattern, "<%s>:", symbol->name); | |
234 | ||
235 | pthread_mutex_lock(&syme->source_lock); | |
236 | for (line = syme->lines; line; line = line->next) { | |
237 | if (strstr(line->line, pattern)) { | |
238 | syme->source = line; | |
239 | break; | |
240 | } | |
241 | } | |
242 | pthread_mutex_unlock(&syme->source_lock); | |
243 | } | |
244 | ||
245 | static void show_lines(struct source_line *queue, int count, int total) | |
246 | { | |
247 | int i; | |
248 | struct source_line *line; | |
249 | ||
250 | line = queue; | |
251 | for (i = 0; i < count; i++) { | |
252 | float pcnt = 100.0*(float)line->count[sym_counter]/(float)total; | |
253 | ||
254 | printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line); | |
255 | line = line->next; | |
256 | } | |
257 | } | |
258 | ||
259 | #define TRACE_COUNT 3 | |
260 | ||
261 | static void show_details(struct sym_entry *syme) | |
262 | { | |
263 | struct symbol *symbol; | |
264 | struct source_line *line; | |
265 | struct source_line *line_queue = NULL; | |
266 | int displayed = 0; | |
267 | int line_queue_count = 0, total = 0, more = 0; | |
268 | ||
269 | if (!syme) | |
270 | return; | |
271 | ||
272 | if (!syme->source) | |
273 | lookup_sym_source(syme); | |
274 | ||
275 | if (!syme->source) | |
276 | return; | |
277 | ||
278 | symbol = (struct symbol *)(syme + 1); | |
279 | printf("Showing %s for %s\n", event_name(sym_counter), symbol->name); | |
280 | printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter); | |
281 | ||
282 | pthread_mutex_lock(&syme->source_lock); | |
283 | line = syme->source; | |
284 | while (line) { | |
285 | total += line->count[sym_counter]; | |
286 | line = line->next; | |
287 | } | |
288 | ||
289 | line = syme->source; | |
290 | while (line) { | |
291 | float pcnt = 0.0; | |
292 | ||
293 | if (!line_queue_count) | |
294 | line_queue = line; | |
295 | line_queue_count++; | |
296 | ||
297 | if (line->count[sym_counter]) | |
298 | pcnt = 100.0 * line->count[sym_counter] / (float)total; | |
299 | if (pcnt >= (float)sym_pcnt_filter) { | |
300 | if (displayed <= print_entries) | |
301 | show_lines(line_queue, line_queue_count, total); | |
302 | else more++; | |
303 | displayed += line_queue_count; | |
304 | line_queue_count = 0; | |
305 | line_queue = NULL; | |
306 | } else if (line_queue_count > TRACE_COUNT) { | |
307 | line_queue = line_queue->next; | |
308 | line_queue_count--; | |
309 | } | |
310 | ||
311 | line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8; | |
312 | line = line->next; | |
313 | } | |
314 | pthread_mutex_unlock(&syme->source_lock); | |
315 | if (more) | |
316 | printf("%d lines not displayed, maybe increase display entries [e]\n", more); | |
317 | } | |
318 | ||
319 | /* | |
320 | * Symbols will be added here in record_ip and will get out | |
321 | * after decayed. | |
322 | */ | |
323 | static LIST_HEAD(active_symbols); | |
324 | static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; | |
325 | ||
326 | /* | |
327 | * Ordering weight: count-1 * count-2 * ... / count-n | |
328 | */ | |
329 | static double sym_weight(const struct sym_entry *sym) | |
330 | { | |
331 | double weight = sym->snap_count; | |
332 | int counter; | |
333 | ||
334 | if (!display_weighted) | |
335 | return weight; | |
336 | ||
337 | for (counter = 1; counter < nr_counters-1; counter++) | |
338 | weight *= sym->count[counter]; | |
339 | ||
340 | weight /= (sym->count[counter] + 1); | |
341 | ||
342 | return weight; | |
343 | } | |
344 | ||
345 | static long samples; | |
346 | static long userspace_samples; | |
347 | static const char CONSOLE_CLEAR[] = "\e[H\e[2J"; | |
348 | ||
349 | static void __list_insert_active_sym(struct sym_entry *syme) | |
350 | { | |
351 | list_add(&syme->node, &active_symbols); | |
352 | } | |
353 | ||
354 | static void list_remove_active_sym(struct sym_entry *syme) | |
355 | { | |
356 | pthread_mutex_lock(&active_symbols_lock); | |
357 | list_del_init(&syme->node); | |
358 | pthread_mutex_unlock(&active_symbols_lock); | |
359 | } | |
360 | ||
361 | static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) | |
362 | { | |
363 | struct rb_node **p = &tree->rb_node; | |
364 | struct rb_node *parent = NULL; | |
365 | struct sym_entry *iter; | |
366 | ||
367 | while (*p != NULL) { | |
368 | parent = *p; | |
369 | iter = rb_entry(parent, struct sym_entry, rb_node); | |
370 | ||
371 | if (se->weight > iter->weight) | |
372 | p = &(*p)->rb_left; | |
373 | else | |
374 | p = &(*p)->rb_right; | |
375 | } | |
376 | ||
377 | rb_link_node(&se->rb_node, parent, p); | |
378 | rb_insert_color(&se->rb_node, tree); | |
379 | } | |
380 | ||
381 | static void print_sym_table(void) | |
382 | { | |
383 | int printed = 0, j; | |
384 | int counter, snap = !display_weighted ? sym_counter : 0; | |
385 | float samples_per_sec = samples/delay_secs; | |
386 | float ksamples_per_sec = (samples-userspace_samples)/delay_secs; | |
387 | float sum_ksamples = 0.0; | |
388 | struct sym_entry *syme, *n; | |
389 | struct rb_root tmp = RB_ROOT; | |
390 | struct rb_node *nd; | |
391 | ||
392 | samples = userspace_samples = 0; | |
393 | ||
394 | /* Sort the active symbols */ | |
395 | pthread_mutex_lock(&active_symbols_lock); | |
396 | syme = list_entry(active_symbols.next, struct sym_entry, node); | |
397 | pthread_mutex_unlock(&active_symbols_lock); | |
398 | ||
399 | list_for_each_entry_safe_from(syme, n, &active_symbols, node) { | |
400 | syme->snap_count = syme->count[snap]; | |
401 | if (syme->snap_count != 0) { | |
402 | syme->weight = sym_weight(syme); | |
403 | rb_insert_active_sym(&tmp, syme); | |
404 | sum_ksamples += syme->snap_count; | |
405 | ||
406 | for (j = 0; j < nr_counters; j++) | |
407 | syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; | |
408 | } else | |
409 | list_remove_active_sym(syme); | |
410 | } | |
411 | ||
412 | puts(CONSOLE_CLEAR); | |
413 | ||
414 | printf( | |
415 | "------------------------------------------------------------------------------\n"); | |
416 | printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", | |
417 | samples_per_sec, | |
418 | 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); | |
419 | ||
420 | if (nr_counters == 1 || !display_weighted) { | |
421 | printf("%Ld", (u64)attrs[0].sample_period); | |
422 | if (freq) | |
423 | printf("Hz "); | |
424 | else | |
425 | printf(" "); | |
426 | } | |
427 | ||
428 | if (!display_weighted) | |
429 | printf("%s", event_name(sym_counter)); | |
430 | else for (counter = 0; counter < nr_counters; counter++) { | |
431 | if (counter) | |
432 | printf("/"); | |
433 | ||
434 | printf("%s", event_name(counter)); | |
435 | } | |
436 | ||
437 | printf( "], "); | |
438 | ||
439 | if (target_pid != -1) | |
440 | printf(" (target_pid: %d", target_pid); | |
441 | else | |
442 | printf(" (all"); | |
443 | ||
444 | if (profile_cpu != -1) | |
445 | printf(", cpu: %d)\n", profile_cpu); | |
446 | else { | |
447 | if (target_pid != -1) | |
448 | printf(")\n"); | |
449 | else | |
450 | printf(", %d CPUs)\n", nr_cpus); | |
451 | } | |
452 | ||
453 | printf("------------------------------------------------------------------------------\n\n"); | |
454 | ||
455 | if (sym_filter_entry) { | |
456 | show_details(sym_filter_entry); | |
457 | return; | |
458 | } | |
459 | ||
460 | if (nr_counters == 1) | |
461 | printf(" samples pcnt"); | |
462 | else | |
463 | printf(" weight samples pcnt"); | |
464 | ||
465 | if (verbose) | |
466 | printf(" RIP "); | |
467 | printf(" kernel function\n"); | |
468 | printf(" %s _______ _____", | |
469 | nr_counters == 1 ? " " : "______"); | |
470 | if (verbose) | |
471 | printf(" ________________"); | |
472 | printf(" _______________\n\n"); | |
473 | ||
474 | for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { | |
475 | struct symbol *sym; | |
476 | double pcnt; | |
477 | ||
478 | syme = rb_entry(nd, struct sym_entry, rb_node); | |
479 | sym = (struct symbol *)(syme + 1); | |
480 | ||
481 | if (++printed > print_entries || (int)syme->snap_count < count_filter) | |
482 | continue; | |
483 | ||
484 | pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / | |
485 | sum_ksamples)); | |
486 | ||
487 | if (nr_counters == 1 || !display_weighted) | |
488 | printf("%20.2f - ", syme->weight); | |
489 | else | |
490 | printf("%9.1f %10ld - ", syme->weight, syme->snap_count); | |
491 | ||
492 | percent_color_fprintf(stdout, "%4.1f%%", pcnt); | |
493 | if (verbose) | |
494 | printf(" - %016llx", sym->start); | |
495 | printf(" : %s", sym->name); | |
496 | if (syme->map->dso->name[0] == '[') | |
497 | printf(" \t%s", syme->map->dso->name); | |
498 | printf("\n"); | |
499 | } | |
500 | } | |
501 | ||
502 | static void prompt_integer(int *target, const char *msg) | |
503 | { | |
504 | char *buf = malloc(0), *p; | |
505 | size_t dummy = 0; | |
506 | int tmp; | |
507 | ||
508 | fprintf(stdout, "\n%s: ", msg); | |
509 | if (getline(&buf, &dummy, stdin) < 0) | |
510 | return; | |
511 | ||
512 | p = strchr(buf, '\n'); | |
513 | if (p) | |
514 | *p = 0; | |
515 | ||
516 | p = buf; | |
517 | while(*p) { | |
518 | if (!isdigit(*p)) | |
519 | goto out_free; | |
520 | p++; | |
521 | } | |
522 | tmp = strtoul(buf, NULL, 10); | |
523 | *target = tmp; | |
524 | out_free: | |
525 | free(buf); | |
526 | } | |
527 | ||
528 | static void prompt_percent(int *target, const char *msg) | |
529 | { | |
530 | int tmp = 0; | |
531 | ||
532 | prompt_integer(&tmp, msg); | |
533 | if (tmp >= 0 && tmp <= 100) | |
534 | *target = tmp; | |
535 | } | |
536 | ||
537 | static void prompt_symbol(struct sym_entry **target, const char *msg) | |
538 | { | |
539 | char *buf = malloc(0), *p; | |
540 | struct sym_entry *syme = *target, *n, *found = NULL; | |
541 | size_t dummy = 0; | |
542 | ||
543 | /* zero counters of active symbol */ | |
544 | if (syme) { | |
545 | pthread_mutex_lock(&syme->source_lock); | |
546 | __zero_source_counters(syme); | |
547 | *target = NULL; | |
548 | pthread_mutex_unlock(&syme->source_lock); | |
549 | } | |
550 | ||
551 | fprintf(stdout, "\n%s: ", msg); | |
552 | if (getline(&buf, &dummy, stdin) < 0) | |
553 | goto out_free; | |
554 | ||
555 | p = strchr(buf, '\n'); | |
556 | if (p) | |
557 | *p = 0; | |
558 | ||
559 | pthread_mutex_lock(&active_symbols_lock); | |
560 | syme = list_entry(active_symbols.next, struct sym_entry, node); | |
561 | pthread_mutex_unlock(&active_symbols_lock); | |
562 | ||
563 | list_for_each_entry_safe_from(syme, n, &active_symbols, node) { | |
564 | struct symbol *sym = (struct symbol *)(syme + 1); | |
565 | ||
566 | if (!strcmp(buf, sym->name)) { | |
567 | found = syme; | |
568 | break; | |
569 | } | |
570 | } | |
571 | ||
572 | if (!found) { | |
573 | fprintf(stderr, "Sorry, %s is not active.\n", sym_filter); | |
574 | sleep(1); | |
575 | return; | |
576 | } else | |
577 | parse_source(found); | |
578 | ||
579 | out_free: | |
580 | free(buf); | |
581 | } | |
582 | ||
583 | static void print_mapped_keys(void) | |
584 | { | |
585 | char *name = NULL; | |
586 | ||
587 | if (sym_filter_entry) { | |
588 | struct symbol *sym = (struct symbol *)(sym_filter_entry+1); | |
589 | name = sym->name; | |
590 | } | |
591 | ||
592 | fprintf(stdout, "\nMapped keys:\n"); | |
593 | fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs); | |
594 | fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries); | |
595 | ||
596 | if (nr_counters > 1) | |
597 | fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter)); | |
598 | ||
599 | fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter); | |
600 | ||
601 | if (vmlinux_name) { | |
602 | fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter); | |
603 | fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL"); | |
604 | fprintf(stdout, "\t[S] stop annotation.\n"); | |
605 | } | |
606 | ||
607 | if (nr_counters > 1) | |
608 | fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0); | |
609 | ||
610 | fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0); | |
611 | fprintf(stdout, "\t[qQ] quit.\n"); | |
612 | } | |
613 | ||
614 | static int key_mapped(int c) | |
615 | { | |
616 | switch (c) { | |
617 | case 'd': | |
618 | case 'e': | |
619 | case 'f': | |
620 | case 'z': | |
621 | case 'q': | |
622 | case 'Q': | |
623 | return 1; | |
624 | case 'E': | |
625 | case 'w': | |
626 | return nr_counters > 1 ? 1 : 0; | |
627 | case 'F': | |
628 | case 's': | |
629 | case 'S': | |
630 | return vmlinux_name ? 1 : 0; | |
631 | default: | |
632 | break; | |
633 | } | |
634 | ||
635 | return 0; | |
636 | } | |
637 | ||
638 | static void handle_keypress(int c) | |
639 | { | |
640 | if (!key_mapped(c)) { | |
641 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | |
642 | struct termios tc, save; | |
643 | ||
644 | print_mapped_keys(); | |
645 | fprintf(stdout, "\nEnter selection, or unmapped key to continue: "); | |
646 | fflush(stdout); | |
647 | ||
648 | tcgetattr(0, &save); | |
649 | tc = save; | |
650 | tc.c_lflag &= ~(ICANON | ECHO); | |
651 | tc.c_cc[VMIN] = 0; | |
652 | tc.c_cc[VTIME] = 0; | |
653 | tcsetattr(0, TCSANOW, &tc); | |
654 | ||
655 | poll(&stdin_poll, 1, -1); | |
656 | c = getc(stdin); | |
657 | ||
658 | tcsetattr(0, TCSAFLUSH, &save); | |
659 | if (!key_mapped(c)) | |
660 | return; | |
661 | } | |
662 | ||
663 | switch (c) { | |
664 | case 'd': | |
665 | prompt_integer(&delay_secs, "Enter display delay"); | |
666 | break; | |
667 | case 'e': | |
668 | prompt_integer(&print_entries, "Enter display entries (lines)"); | |
669 | break; | |
670 | case 'E': | |
671 | if (nr_counters > 1) { | |
672 | int i; | |
673 | ||
674 | fprintf(stderr, "\nAvailable events:"); | |
675 | for (i = 0; i < nr_counters; i++) | |
676 | fprintf(stderr, "\n\t%d %s", i, event_name(i)); | |
677 | ||
678 | prompt_integer(&sym_counter, "Enter details event counter"); | |
679 | ||
680 | if (sym_counter >= nr_counters) { | |
681 | fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0)); | |
682 | sym_counter = 0; | |
683 | sleep(1); | |
684 | } | |
685 | } else sym_counter = 0; | |
686 | break; | |
687 | case 'f': | |
688 | prompt_integer(&count_filter, "Enter display event count filter"); | |
689 | break; | |
690 | case 'F': | |
691 | prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)"); | |
692 | break; | |
693 | case 'q': | |
694 | case 'Q': | |
695 | printf("exiting.\n"); | |
696 | exit(0); | |
697 | case 's': | |
698 | prompt_symbol(&sym_filter_entry, "Enter details symbol"); | |
699 | break; | |
700 | case 'S': | |
701 | if (!sym_filter_entry) | |
702 | break; | |
703 | else { | |
704 | struct sym_entry *syme = sym_filter_entry; | |
705 | ||
706 | pthread_mutex_lock(&syme->source_lock); | |
707 | sym_filter_entry = NULL; | |
708 | __zero_source_counters(syme); | |
709 | pthread_mutex_unlock(&syme->source_lock); | |
710 | } | |
711 | break; | |
712 | case 'w': | |
713 | display_weighted = ~display_weighted; | |
714 | break; | |
715 | case 'z': | |
716 | zero = ~zero; | |
717 | break; | |
718 | default: | |
719 | break; | |
720 | } | |
721 | } | |
722 | ||
723 | static void *display_thread(void *arg __used) | |
724 | { | |
725 | struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; | |
726 | struct termios tc, save; | |
727 | int delay_msecs, c; | |
728 | ||
729 | tcgetattr(0, &save); | |
730 | tc = save; | |
731 | tc.c_lflag &= ~(ICANON | ECHO); | |
732 | tc.c_cc[VMIN] = 0; | |
733 | tc.c_cc[VTIME] = 0; | |
734 | ||
735 | repeat: | |
736 | delay_msecs = delay_secs * 1000; | |
737 | tcsetattr(0, TCSANOW, &tc); | |
738 | /* trash return*/ | |
739 | getc(stdin); | |
740 | ||
741 | do { | |
742 | print_sym_table(); | |
743 | } while (!poll(&stdin_poll, 1, delay_msecs) == 1); | |
744 | ||
745 | c = getc(stdin); | |
746 | tcsetattr(0, TCSAFLUSH, &save); | |
747 | ||
748 | handle_keypress(c); | |
749 | goto repeat; | |
750 | ||
751 | return NULL; | |
752 | } | |
753 | ||
754 | /* Tag samples to be skipped. */ | |
755 | static const char *skip_symbols[] = { | |
756 | "default_idle", | |
757 | "cpu_idle", | |
758 | "enter_idle", | |
759 | "exit_idle", | |
760 | "mwait_idle", | |
761 | "mwait_idle_with_hints", | |
762 | "poll_idle", | |
763 | "ppc64_runlatch_off", | |
764 | "pseries_dedicated_idle_sleep", | |
765 | NULL | |
766 | }; | |
767 | ||
768 | static int symbol_filter(struct map *map, struct symbol *sym) | |
769 | { | |
770 | struct sym_entry *syme; | |
771 | const char *name = sym->name; | |
772 | int i; | |
773 | ||
774 | /* | |
775 | * ppc64 uses function descriptors and appends a '.' to the | |
776 | * start of every instruction address. Remove it. | |
777 | */ | |
778 | if (name[0] == '.') | |
779 | name++; | |
780 | ||
781 | if (!strcmp(name, "_text") || | |
782 | !strcmp(name, "_etext") || | |
783 | !strcmp(name, "_sinittext") || | |
784 | !strncmp("init_module", name, 11) || | |
785 | !strncmp("cleanup_module", name, 14) || | |
786 | strstr(name, "_text_start") || | |
787 | strstr(name, "_text_end")) | |
788 | return 1; | |
789 | ||
790 | syme = dso__sym_priv(map->dso, sym); | |
791 | syme->map = map; | |
792 | pthread_mutex_init(&syme->source_lock, NULL); | |
793 | if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter)) | |
794 | sym_filter_entry = syme; | |
795 | ||
796 | for (i = 0; skip_symbols[i]; i++) { | |
797 | if (!strcmp(skip_symbols[i], name)) { | |
798 | syme->skip = 1; | |
799 | break; | |
800 | } | |
801 | } | |
802 | ||
803 | return 0; | |
804 | } | |
805 | ||
806 | static int parse_symbols(void) | |
807 | { | |
808 | if (dsos__load_kernel(vmlinux_name, sizeof(struct sym_entry), | |
809 | symbol_filter, verbose, 1) <= 0) | |
810 | return -1; | |
811 | ||
812 | if (dump_symtab) | |
813 | dsos__fprintf(stderr); | |
814 | ||
815 | return 0; | |
816 | } | |
817 | ||
818 | /* | |
819 | * Binary search in the histogram table and record the hit: | |
820 | */ | |
821 | static void record_ip(u64 ip, int counter) | |
822 | { | |
823 | struct map *map; | |
824 | struct symbol *sym = kernel_maps__find_symbol(ip, &map); | |
825 | ||
826 | if (sym != NULL) { | |
827 | struct sym_entry *syme = dso__sym_priv(map->dso, sym); | |
828 | ||
829 | if (!syme->skip) { | |
830 | syme->count[counter]++; | |
831 | record_precise_ip(syme, counter, ip); | |
832 | pthread_mutex_lock(&active_symbols_lock); | |
833 | if (list_empty(&syme->node) || !syme->node.next) | |
834 | __list_insert_active_sym(syme); | |
835 | pthread_mutex_unlock(&active_symbols_lock); | |
836 | return; | |
837 | } | |
838 | } | |
839 | ||
840 | samples--; | |
841 | } | |
842 | ||
843 | static void process_event(u64 ip, int counter, int user) | |
844 | { | |
845 | samples++; | |
846 | ||
847 | if (user) { | |
848 | userspace_samples++; | |
849 | return; | |
850 | } | |
851 | ||
852 | record_ip(ip, counter); | |
853 | } | |
854 | ||
855 | struct mmap_data { | |
856 | int counter; | |
857 | void *base; | |
858 | int mask; | |
859 | unsigned int prev; | |
860 | }; | |
861 | ||
862 | static unsigned int mmap_read_head(struct mmap_data *md) | |
863 | { | |
864 | struct perf_event_mmap_page *pc = md->base; | |
865 | int head; | |
866 | ||
867 | head = pc->data_head; | |
868 | rmb(); | |
869 | ||
870 | return head; | |
871 | } | |
872 | ||
873 | struct timeval last_read, this_read; | |
874 | ||
875 | static void mmap_read_counter(struct mmap_data *md) | |
876 | { | |
877 | unsigned int head = mmap_read_head(md); | |
878 | unsigned int old = md->prev; | |
879 | unsigned char *data = md->base + page_size; | |
880 | int diff; | |
881 | ||
882 | gettimeofday(&this_read, NULL); | |
883 | ||
884 | /* | |
885 | * If we're further behind than half the buffer, there's a chance | |
886 | * the writer will bite our tail and mess up the samples under us. | |
887 | * | |
888 | * If we somehow ended up ahead of the head, we got messed up. | |
889 | * | |
890 | * In either case, truncate and restart at head. | |
891 | */ | |
892 | diff = head - old; | |
893 | if (diff > md->mask / 2 || diff < 0) { | |
894 | struct timeval iv; | |
895 | unsigned long msecs; | |
896 | ||
897 | timersub(&this_read, &last_read, &iv); | |
898 | msecs = iv.tv_sec*1000 + iv.tv_usec/1000; | |
899 | ||
900 | fprintf(stderr, "WARNING: failed to keep up with mmap data." | |
901 | " Last read %lu msecs ago.\n", msecs); | |
902 | ||
903 | /* | |
904 | * head points to a known good entry, start there. | |
905 | */ | |
906 | old = head; | |
907 | } | |
908 | ||
909 | last_read = this_read; | |
910 | ||
911 | for (; old != head;) { | |
912 | event_t *event = (event_t *)&data[old & md->mask]; | |
913 | ||
914 | event_t event_copy; | |
915 | ||
916 | size_t size = event->header.size; | |
917 | ||
918 | /* | |
919 | * Event straddles the mmap boundary -- header should always | |
920 | * be inside due to u64 alignment of output. | |
921 | */ | |
922 | if ((old & md->mask) + size != ((old + size) & md->mask)) { | |
923 | unsigned int offset = old; | |
924 | unsigned int len = min(sizeof(*event), size), cpy; | |
925 | void *dst = &event_copy; | |
926 | ||
927 | do { | |
928 | cpy = min(md->mask + 1 - (offset & md->mask), len); | |
929 | memcpy(dst, &data[offset & md->mask], cpy); | |
930 | offset += cpy; | |
931 | dst += cpy; | |
932 | len -= cpy; | |
933 | } while (len); | |
934 | ||
935 | event = &event_copy; | |
936 | } | |
937 | ||
938 | old += size; | |
939 | ||
940 | if (event->header.type == PERF_RECORD_SAMPLE) { | |
941 | int user = | |
942 | (event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK) == PERF_RECORD_MISC_USER; | |
943 | process_event(event->ip.ip, md->counter, user); | |
944 | } | |
945 | } | |
946 | ||
947 | md->prev = old; | |
948 | } | |
949 | ||
950 | static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; | |
951 | static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; | |
952 | ||
953 | static void mmap_read(void) | |
954 | { | |
955 | int i, counter; | |
956 | ||
957 | for (i = 0; i < nr_cpus; i++) { | |
958 | for (counter = 0; counter < nr_counters; counter++) | |
959 | mmap_read_counter(&mmap_array[i][counter]); | |
960 | } | |
961 | } | |
962 | ||
963 | int nr_poll; | |
964 | int group_fd; | |
965 | ||
966 | static void start_counter(int i, int counter) | |
967 | { | |
968 | struct perf_event_attr *attr; | |
969 | int cpu; | |
970 | ||
971 | cpu = profile_cpu; | |
972 | if (target_pid == -1 && profile_cpu == -1) | |
973 | cpu = i; | |
974 | ||
975 | attr = attrs + counter; | |
976 | ||
977 | attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; | |
978 | ||
979 | if (freq) { | |
980 | attr->sample_type |= PERF_SAMPLE_PERIOD; | |
981 | attr->freq = 1; | |
982 | attr->sample_freq = freq; | |
983 | } | |
984 | ||
985 | attr->inherit = (cpu < 0) && inherit; | |
986 | ||
987 | try_again: | |
988 | fd[i][counter] = sys_perf_event_open(attr, target_pid, cpu, group_fd, 0); | |
989 | ||
990 | if (fd[i][counter] < 0) { | |
991 | int err = errno; | |
992 | ||
993 | if (err == EPERM) | |
994 | die("No permission - are you root?\n"); | |
995 | /* | |
996 | * If it's cycles then fall back to hrtimer | |
997 | * based cpu-clock-tick sw counter, which | |
998 | * is always available even if no PMU support: | |
999 | */ | |
1000 | if (attr->type == PERF_TYPE_HARDWARE | |
1001 | && attr->config == PERF_COUNT_HW_CPU_CYCLES) { | |
1002 | ||
1003 | if (verbose) | |
1004 | warning(" ... trying to fall back to cpu-clock-ticks\n"); | |
1005 | ||
1006 | attr->type = PERF_TYPE_SOFTWARE; | |
1007 | attr->config = PERF_COUNT_SW_CPU_CLOCK; | |
1008 | goto try_again; | |
1009 | } | |
1010 | printf("\n"); | |
1011 | error("perfcounter syscall returned with %d (%s)\n", | |
1012 | fd[i][counter], strerror(err)); | |
1013 | die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); | |
1014 | exit(-1); | |
1015 | } | |
1016 | assert(fd[i][counter] >= 0); | |
1017 | fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); | |
1018 | ||
1019 | /* | |
1020 | * First counter acts as the group leader: | |
1021 | */ | |
1022 | if (group && group_fd == -1) | |
1023 | group_fd = fd[i][counter]; | |
1024 | ||
1025 | event_array[nr_poll].fd = fd[i][counter]; | |
1026 | event_array[nr_poll].events = POLLIN; | |
1027 | nr_poll++; | |
1028 | ||
1029 | mmap_array[i][counter].counter = counter; | |
1030 | mmap_array[i][counter].prev = 0; | |
1031 | mmap_array[i][counter].mask = mmap_pages*page_size - 1; | |
1032 | mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, | |
1033 | PROT_READ, MAP_SHARED, fd[i][counter], 0); | |
1034 | if (mmap_array[i][counter].base == MAP_FAILED) | |
1035 | die("failed to mmap with %d (%s)\n", errno, strerror(errno)); | |
1036 | } | |
1037 | ||
1038 | static int __cmd_top(void) | |
1039 | { | |
1040 | pthread_t thread; | |
1041 | int i, counter; | |
1042 | int ret; | |
1043 | ||
1044 | for (i = 0; i < nr_cpus; i++) { | |
1045 | group_fd = -1; | |
1046 | for (counter = 0; counter < nr_counters; counter++) | |
1047 | start_counter(i, counter); | |
1048 | } | |
1049 | ||
1050 | /* Wait for a minimal set of events before starting the snapshot */ | |
1051 | poll(event_array, nr_poll, 100); | |
1052 | ||
1053 | mmap_read(); | |
1054 | ||
1055 | if (pthread_create(&thread, NULL, display_thread, NULL)) { | |
1056 | printf("Could not create display thread.\n"); | |
1057 | exit(-1); | |
1058 | } | |
1059 | ||
1060 | if (realtime_prio) { | |
1061 | struct sched_param param; | |
1062 | ||
1063 | param.sched_priority = realtime_prio; | |
1064 | if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { | |
1065 | printf("Could not set realtime priority.\n"); | |
1066 | exit(-1); | |
1067 | } | |
1068 | } | |
1069 | ||
1070 | while (1) { | |
1071 | int hits = samples; | |
1072 | ||
1073 | mmap_read(); | |
1074 | ||
1075 | if (hits == samples) | |
1076 | ret = poll(event_array, nr_poll, 100); | |
1077 | } | |
1078 | ||
1079 | return 0; | |
1080 | } | |
1081 | ||
1082 | static const char * const top_usage[] = { | |
1083 | "perf top [<options>]", | |
1084 | NULL | |
1085 | }; | |
1086 | ||
1087 | static const struct option options[] = { | |
1088 | OPT_CALLBACK('e', "event", NULL, "event", | |
1089 | "event selector. use 'perf list' to list available events", | |
1090 | parse_events), | |
1091 | OPT_INTEGER('c', "count", &default_interval, | |
1092 | "event period to sample"), | |
1093 | OPT_INTEGER('p', "pid", &target_pid, | |
1094 | "profile events on existing pid"), | |
1095 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | |
1096 | "system-wide collection from all CPUs"), | |
1097 | OPT_INTEGER('C', "CPU", &profile_cpu, | |
1098 | "CPU to profile on"), | |
1099 | OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), | |
1100 | OPT_INTEGER('m', "mmap-pages", &mmap_pages, | |
1101 | "number of mmap data pages"), | |
1102 | OPT_INTEGER('r', "realtime", &realtime_prio, | |
1103 | "collect data with this RT SCHED_FIFO priority"), | |
1104 | OPT_INTEGER('d', "delay", &delay_secs, | |
1105 | "number of seconds to delay between refreshes"), | |
1106 | OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, | |
1107 | "dump the symbol table used for profiling"), | |
1108 | OPT_INTEGER('f', "count-filter", &count_filter, | |
1109 | "only display functions with more events than this"), | |
1110 | OPT_BOOLEAN('g', "group", &group, | |
1111 | "put the counters into a counter group"), | |
1112 | OPT_BOOLEAN('i', "inherit", &inherit, | |
1113 | "child tasks inherit counters"), | |
1114 | OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name", | |
1115 | "symbol to annotate - requires -k option"), | |
1116 | OPT_BOOLEAN('z', "zero", &zero, | |
1117 | "zero history across updates"), | |
1118 | OPT_INTEGER('F', "freq", &freq, | |
1119 | "profile at this frequency"), | |
1120 | OPT_INTEGER('E', "entries", &print_entries, | |
1121 | "display this many functions"), | |
1122 | OPT_BOOLEAN('v', "verbose", &verbose, | |
1123 | "be more verbose (show counter open errors, etc)"), | |
1124 | OPT_END() | |
1125 | }; | |
1126 | ||
1127 | int cmd_top(int argc, const char **argv, const char *prefix __used) | |
1128 | { | |
1129 | int counter; | |
1130 | ||
1131 | symbol__init(); | |
1132 | ||
1133 | page_size = sysconf(_SC_PAGE_SIZE); | |
1134 | ||
1135 | argc = parse_options(argc, argv, options, top_usage, 0); | |
1136 | if (argc) | |
1137 | usage_with_options(top_usage, options); | |
1138 | ||
1139 | /* CPU and PID are mutually exclusive */ | |
1140 | if (target_pid != -1 && profile_cpu != -1) { | |
1141 | printf("WARNING: PID switch overriding CPU\n"); | |
1142 | sleep(1); | |
1143 | profile_cpu = -1; | |
1144 | } | |
1145 | ||
1146 | if (!nr_counters) | |
1147 | nr_counters = 1; | |
1148 | ||
1149 | if (delay_secs < 1) | |
1150 | delay_secs = 1; | |
1151 | ||
1152 | parse_symbols(); | |
1153 | parse_source(sym_filter_entry); | |
1154 | ||
1155 | ||
1156 | /* | |
1157 | * User specified count overrides default frequency. | |
1158 | */ | |
1159 | if (default_interval) | |
1160 | freq = 0; | |
1161 | else if (freq) { | |
1162 | default_interval = freq; | |
1163 | } else { | |
1164 | fprintf(stderr, "frequency and count are zero, aborting\n"); | |
1165 | exit(EXIT_FAILURE); | |
1166 | } | |
1167 | ||
1168 | /* | |
1169 | * Fill in the ones not specifically initialized via -c: | |
1170 | */ | |
1171 | for (counter = 0; counter < nr_counters; counter++) { | |
1172 | if (attrs[counter].sample_period) | |
1173 | continue; | |
1174 | ||
1175 | attrs[counter].sample_period = default_interval; | |
1176 | } | |
1177 | ||
1178 | nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); | |
1179 | assert(nr_cpus <= MAX_NR_CPUS); | |
1180 | assert(nr_cpus >= 0); | |
1181 | ||
1182 | if (target_pid != -1 || profile_cpu != -1) | |
1183 | nr_cpus = 1; | |
1184 | ||
1185 | return __cmd_top(); | |
1186 | } |