]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - tools/perf/builtin-top.c
perf: Enable more compiler warnings
[mirror_ubuntu-artful-kernel.git] / tools / perf / builtin-top.c
CommitLineData
07800601 1/*
bf9e1876
IM
2 * builtin-top.c
3 *
4 * Builtin top command: Display a continuously updated profile of
5 * any workload, CPU or specific PID.
6 *
7 * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
8 *
9 * Improvements and fixes by:
10 *
11 * Arjan van de Ven <arjan@linux.intel.com>
12 * Yanmin Zhang <yanmin.zhang@intel.com>
13 * Wu Fengguang <fengguang.wu@intel.com>
14 * Mike Galbraith <efault@gmx.de>
15 * Paul Mackerras <paulus@samba.org>
16 *
17 * Released under the GPL v2. (and only v2, not any later version)
07800601 18 */
bf9e1876 19#include "builtin.h"
07800601 20
1a482f38 21#include "perf.h"
bf9e1876 22
de04687f 23#include "util/symbol.h"
8fc0321f 24#include "util/color.h"
148be2c1 25#include "util/util.h"
43cbcd8a 26#include <linux/rbtree.h>
b456bae0
IM
27#include "util/parse-options.h"
28#include "util/parse-events.h"
07800601 29
07800601
IM
30#include <assert.h>
31#include <fcntl.h>
0e9b20b8 32
07800601 33#include <stdio.h>
923c42c1
MG
34#include <termios.h>
35#include <unistd.h>
0e9b20b8 36
07800601 37#include <errno.h>
07800601
IM
38#include <time.h>
39#include <sched.h>
40#include <pthread.h>
41
42#include <sys/syscall.h>
43#include <sys/ioctl.h>
44#include <sys/poll.h>
45#include <sys/prctl.h>
46#include <sys/wait.h>
47#include <sys/uio.h>
48#include <sys/mman.h>
49
50#include <linux/unistd.h>
51#include <linux/types.h>
52
a21ca2ca 53static int fd[MAX_NR_CPUS][MAX_COUNTERS];
07800601 54
a21ca2ca 55static int system_wide = 0;
07800601 56
a21ca2ca 57static int default_interval = 100000;
07800601 58
923c42c1 59static int count_filter = 5;
6e53cdf1 60static int print_entries = 15;
07800601 61
6e53cdf1 62static int target_pid = -1;
0fdc7e67 63static int inherit = 0;
07800601
IM
64static int profile_cpu = -1;
65static int nr_cpus = 0;
07800601
IM
66static unsigned int realtime_prio = 0;
67static int group = 0;
68static unsigned int page_size;
cf1f4574
IM
69static unsigned int mmap_pages = 16;
70static int freq = 0;
07800601 71
07800601
IM
72static int delay_secs = 2;
73static int zero;
74static int dump_symtab;
75
923c42c1
MG
76/*
77 * Source
78 */
79
80struct source_line {
81 u64 eip;
82 unsigned long count[MAX_COUNTERS];
83 char *line;
84 struct source_line *next;
85};
86
87static char *sym_filter = NULL;
88struct sym_entry *sym_filter_entry = NULL;
89static int sym_pcnt_filter = 5;
90static int sym_counter = 0;
46ab9764 91static int display_weighted = -1;
923c42c1 92
07800601
IM
93/*
94 * Symbols
95 */
96
9cffa8d5
PM
97static u64 min_ip;
98static u64 max_ip = -1ll;
07800601
IM
99
100struct sym_entry {
de04687f
ACM
101 struct rb_node rb_node;
102 struct list_head node;
07800601 103 unsigned long count[MAX_COUNTERS];
c44613a4
ACM
104 unsigned long snap_count;
105 double weight;
07800601 106 int skip;
923c42c1
MG
107 struct source_line *source;
108 struct source_line *lines;
109 struct source_line **lines_tail;
110 pthread_mutex_t source_lock;
07800601
IM
111};
112
923c42c1
MG
113/*
114 * Source functions
115 */
116
117static void parse_source(struct sym_entry *syme)
118{
119 struct symbol *sym;
120 struct module *module;
121 struct section *section = NULL;
122 FILE *file;
83a0944f
IM
123 char command[PATH_MAX*2];
124 const char *path = vmlinux_name;
923c42c1
MG
125 u64 start, end, len;
126
127 if (!syme)
128 return;
129
130 if (syme->lines) {
131 pthread_mutex_lock(&syme->source_lock);
132 goto out_assign;
133 }
134
135 sym = (struct symbol *)(syme + 1);
136 module = sym->module;
137
138 if (module)
139 path = module->path;
140 if (!path)
141 return;
142
143 start = sym->obj_start;
144 if (!start)
145 start = sym->start;
146
147 if (module) {
148 section = module->sections->find_section(module->sections, ".text");
149 if (section)
150 start -= section->vma;
151 }
152
153 end = start + sym->end - sym->start + 1;
154 len = sym->end - sym->start;
155
156 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
157
158 file = popen(command, "r");
159 if (!file)
160 return;
161
162 pthread_mutex_lock(&syme->source_lock);
163 syme->lines_tail = &syme->lines;
164 while (!feof(file)) {
165 struct source_line *src;
166 size_t dummy = 0;
167 char *c;
168
169 src = malloc(sizeof(struct source_line));
170 assert(src != NULL);
171 memset(src, 0, sizeof(struct source_line));
172
173 if (getline(&src->line, &dummy, file) < 0)
174 break;
175 if (!src->line)
176 break;
177
178 c = strchr(src->line, '\n');
179 if (c)
180 *c = 0;
181
182 src->next = NULL;
183 *syme->lines_tail = src;
184 syme->lines_tail = &src->next;
185
186 if (strlen(src->line)>8 && src->line[8] == ':') {
187 src->eip = strtoull(src->line, NULL, 16);
188 if (section)
189 src->eip += section->vma;
190 }
191 if (strlen(src->line)>8 && src->line[16] == ':') {
192 src->eip = strtoull(src->line, NULL, 16);
193 if (section)
194 src->eip += section->vma;
195 }
196 }
197 pclose(file);
198out_assign:
199 sym_filter_entry = syme;
200 pthread_mutex_unlock(&syme->source_lock);
201}
202
203static void __zero_source_counters(struct sym_entry *syme)
204{
205 int i;
206 struct source_line *line;
207
208 line = syme->lines;
209 while (line) {
210 for (i = 0; i < nr_counters; i++)
211 line->count[i] = 0;
212 line = line->next;
213 }
214}
215
216static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
217{
218 struct source_line *line;
219
220 if (syme != sym_filter_entry)
221 return;
222
223 if (pthread_mutex_trylock(&syme->source_lock))
224 return;
225
226 if (!syme->source)
227 goto out_unlock;
228
229 for (line = syme->lines; line; line = line->next) {
230 if (line->eip == ip) {
231 line->count[counter]++;
232 break;
233 }
234 if (line->eip > ip)
235 break;
236 }
237out_unlock:
238 pthread_mutex_unlock(&syme->source_lock);
239}
240
241static void lookup_sym_source(struct sym_entry *syme)
242{
243 struct symbol *symbol = (struct symbol *)(syme + 1);
244 struct source_line *line;
245 char pattern[PATH_MAX];
246 char *idx;
247
248 sprintf(pattern, "<%s>:", symbol->name);
249
250 if (symbol->module) {
251 idx = strstr(pattern, "\t");
252 if (idx)
253 *idx = 0;
254 }
255
256 pthread_mutex_lock(&syme->source_lock);
257 for (line = syme->lines; line; line = line->next) {
258 if (strstr(line->line, pattern)) {
259 syme->source = line;
260 break;
261 }
262 }
263 pthread_mutex_unlock(&syme->source_lock);
264}
265
266static void show_lines(struct source_line *queue, int count, int total)
267{
268 int i;
269 struct source_line *line;
270
271 line = queue;
272 for (i = 0; i < count; i++) {
273 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
274
275 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
276 line = line->next;
277 }
278}
279
280#define TRACE_COUNT 3
281
282static void show_details(struct sym_entry *syme)
283{
284 struct symbol *symbol;
285 struct source_line *line;
286 struct source_line *line_queue = NULL;
287 int displayed = 0;
288 int line_queue_count = 0, total = 0, more = 0;
289
290 if (!syme)
291 return;
292
293 if (!syme->source)
294 lookup_sym_source(syme);
295
296 if (!syme->source)
297 return;
298
299 symbol = (struct symbol *)(syme + 1);
300 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
301 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
302
303 pthread_mutex_lock(&syme->source_lock);
304 line = syme->source;
305 while (line) {
306 total += line->count[sym_counter];
307 line = line->next;
308 }
309
310 line = syme->source;
311 while (line) {
312 float pcnt = 0.0;
313
314 if (!line_queue_count)
315 line_queue = line;
316 line_queue_count++;
317
318 if (line->count[sym_counter])
319 pcnt = 100.0 * line->count[sym_counter] / (float)total;
320 if (pcnt >= (float)sym_pcnt_filter) {
321 if (displayed <= print_entries)
322 show_lines(line_queue, line_queue_count, total);
323 else more++;
324 displayed += line_queue_count;
325 line_queue_count = 0;
326 line_queue = NULL;
327 } else if (line_queue_count > TRACE_COUNT) {
328 line_queue = line_queue->next;
329 line_queue_count--;
330 }
331
332 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
333 line = line->next;
334 }
335 pthread_mutex_unlock(&syme->source_lock);
336 if (more)
337 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
338}
07800601 339
de04687f
ACM
340/*
341 * Symbols will be added here in record_ip and will get out
342 * after decayed.
343 */
344static LIST_HEAD(active_symbols);
c44613a4 345static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER;
07800601 346
07800601
IM
347/*
348 * Ordering weight: count-1 * count-2 * ... / count-n
349 */
350static double sym_weight(const struct sym_entry *sym)
351{
c44613a4 352 double weight = sym->snap_count;
07800601
IM
353 int counter;
354
46ab9764
MG
355 if (!display_weighted)
356 return weight;
357
07800601
IM
358 for (counter = 1; counter < nr_counters-1; counter++)
359 weight *= sym->count[counter];
360
361 weight /= (sym->count[counter] + 1);
362
363 return weight;
364}
365
2debbc83
IM
366static long samples;
367static long userspace_samples;
07800601
IM
368static const char CONSOLE_CLEAR[] = "\e[H\e[2J";
369
c44613a4 370static void __list_insert_active_sym(struct sym_entry *syme)
de04687f
ACM
371{
372 list_add(&syme->node, &active_symbols);
373}
374
c44613a4
ACM
375static void list_remove_active_sym(struct sym_entry *syme)
376{
377 pthread_mutex_lock(&active_symbols_lock);
378 list_del_init(&syme->node);
379 pthread_mutex_unlock(&active_symbols_lock);
380}
381
de04687f
ACM
382static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
383{
384 struct rb_node **p = &tree->rb_node;
385 struct rb_node *parent = NULL;
386 struct sym_entry *iter;
387
388 while (*p != NULL) {
389 parent = *p;
390 iter = rb_entry(parent, struct sym_entry, rb_node);
391
c44613a4 392 if (se->weight > iter->weight)
de04687f
ACM
393 p = &(*p)->rb_left;
394 else
395 p = &(*p)->rb_right;
396 }
397
398 rb_link_node(&se->rb_node, parent, p);
399 rb_insert_color(&se->rb_node, tree);
400}
07800601
IM
401
402static void print_sym_table(void)
403{
233f0b95 404 int printed = 0, j;
46ab9764 405 int counter, snap = !display_weighted ? sym_counter : 0;
2debbc83
IM
406 float samples_per_sec = samples/delay_secs;
407 float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
408 float sum_ksamples = 0.0;
de04687f
ACM
409 struct sym_entry *syme, *n;
410 struct rb_root tmp = RB_ROOT;
411 struct rb_node *nd;
07800601 412
2debbc83 413 samples = userspace_samples = 0;
07800601 414
de04687f 415 /* Sort the active symbols */
c44613a4
ACM
416 pthread_mutex_lock(&active_symbols_lock);
417 syme = list_entry(active_symbols.next, struct sym_entry, node);
418 pthread_mutex_unlock(&active_symbols_lock);
419
420 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
46ab9764 421 syme->snap_count = syme->count[snap];
c44613a4
ACM
422 if (syme->snap_count != 0) {
423 syme->weight = sym_weight(syme);
de04687f 424 rb_insert_active_sym(&tmp, syme);
2debbc83 425 sum_ksamples += syme->snap_count;
d94b9430
MG
426
427 for (j = 0; j < nr_counters; j++)
de04687f
ACM
428 syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8;
429 } else
c44613a4 430 list_remove_active_sym(syme);
d94b9430
MG
431 }
432
0f5486b5 433 puts(CONSOLE_CLEAR);
07800601
IM
434
435 printf(
436"------------------------------------------------------------------------------\n");
f2521b6e 437 printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [",
2debbc83
IM
438 samples_per_sec,
439 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
07800601 440
46ab9764 441 if (nr_counters == 1 || !display_weighted) {
9cffa8d5 442 printf("%Ld", (u64)attrs[0].sample_period);
cf1f4574
IM
443 if (freq)
444 printf("Hz ");
445 else
446 printf(" ");
447 }
07800601 448
46ab9764
MG
449 if (!display_weighted)
450 printf("%s", event_name(sym_counter));
451 else for (counter = 0; counter < nr_counters; counter++) {
07800601
IM
452 if (counter)
453 printf("/");
454
455 printf("%s", event_name(counter));
456 }
457
458 printf( "], ");
459
b456bae0
IM
460 if (target_pid != -1)
461 printf(" (target_pid: %d", target_pid);
07800601
IM
462 else
463 printf(" (all");
464
465 if (profile_cpu != -1)
466 printf(", cpu: %d)\n", profile_cpu);
467 else {
b456bae0 468 if (target_pid != -1)
07800601
IM
469 printf(")\n");
470 else
471 printf(", %d CPUs)\n", nr_cpus);
472 }
473
474 printf("------------------------------------------------------------------------------\n\n");
475
923c42c1
MG
476 if (sym_filter_entry) {
477 show_details(sym_filter_entry);
478 return;
479 }
480
07800601 481 if (nr_counters == 1)
2debbc83 482 printf(" samples pcnt");
07800601 483 else
2debbc83 484 printf(" weight samples pcnt");
07800601
IM
485
486 printf(" RIP kernel function\n"
2debbc83 487 " ______ _______ _____ ________________ _______________\n\n"
07800601
IM
488 );
489
de04687f 490 for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) {
83a0944f 491 struct symbol *sym;
8fc0321f 492 double pcnt;
d94b9430 493
83a0944f
IM
494 syme = rb_entry(nd, struct sym_entry, rb_node);
495 sym = (struct symbol *)(syme + 1);
496
923c42c1 497 if (++printed > print_entries || (int)syme->snap_count < count_filter)
c44613a4 498 continue;
d94b9430 499
2debbc83
IM
500 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
501 sum_ksamples));
d94b9430 502
46ab9764 503 if (nr_counters == 1 || !display_weighted)
2debbc83 504 printf("%20.2f - ", syme->weight);
d94b9430 505 else
2debbc83 506 printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
8fc0321f 507
1e11fd82 508 percent_color_fprintf(stdout, "%4.1f%%", pcnt);
42976487
MG
509 printf(" - %016llx : %s", sym->start, sym->name);
510 if (sym->module)
511 printf("\t[%s]", sym->module->name);
512 printf("\n");
07800601 513 }
07800601
IM
514}
515
923c42c1
MG
516static void prompt_integer(int *target, const char *msg)
517{
518 char *buf = malloc(0), *p;
519 size_t dummy = 0;
520 int tmp;
521
522 fprintf(stdout, "\n%s: ", msg);
523 if (getline(&buf, &dummy, stdin) < 0)
524 return;
525
526 p = strchr(buf, '\n');
527 if (p)
528 *p = 0;
529
530 p = buf;
531 while(*p) {
532 if (!isdigit(*p))
533 goto out_free;
534 p++;
535 }
536 tmp = strtoul(buf, NULL, 10);
537 *target = tmp;
538out_free:
539 free(buf);
540}
541
542static void prompt_percent(int *target, const char *msg)
543{
544 int tmp = 0;
545
546 prompt_integer(&tmp, msg);
547 if (tmp >= 0 && tmp <= 100)
548 *target = tmp;
549}
550
551static void prompt_symbol(struct sym_entry **target, const char *msg)
552{
553 char *buf = malloc(0), *p;
554 struct sym_entry *syme = *target, *n, *found = NULL;
555 size_t dummy = 0;
556
557 /* zero counters of active symbol */
558 if (syme) {
559 pthread_mutex_lock(&syme->source_lock);
560 __zero_source_counters(syme);
561 *target = NULL;
562 pthread_mutex_unlock(&syme->source_lock);
563 }
564
565 fprintf(stdout, "\n%s: ", msg);
566 if (getline(&buf, &dummy, stdin) < 0)
567 goto out_free;
568
569 p = strchr(buf, '\n');
570 if (p)
571 *p = 0;
572
573 pthread_mutex_lock(&active_symbols_lock);
574 syme = list_entry(active_symbols.next, struct sym_entry, node);
575 pthread_mutex_unlock(&active_symbols_lock);
576
577 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
578 struct symbol *sym = (struct symbol *)(syme + 1);
579
580 if (!strcmp(buf, sym->name)) {
581 found = syme;
582 break;
583 }
584 }
585
586 if (!found) {
587 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
588 sleep(1);
589 return;
590 } else
591 parse_source(found);
592
593out_free:
594 free(buf);
595}
596
091bd2e9 597static void print_mapped_keys(void)
923c42c1 598{
091bd2e9
MG
599 char *name = NULL;
600
601 if (sym_filter_entry) {
602 struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
603 name = sym->name;
604 }
605
606 fprintf(stdout, "\nMapped keys:\n");
607 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
608 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
609
610 if (nr_counters > 1)
611 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter));
612
613 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
614
83a0944f 615 if (vmlinux_name) {
091bd2e9
MG
616 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
617 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
618 fprintf(stdout, "\t[S] stop annotation.\n");
619 }
620
621 if (nr_counters > 1)
622 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
623
46ab9764 624 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
091bd2e9
MG
625 fprintf(stdout, "\t[qQ] quit.\n");
626}
627
628static int key_mapped(int c)
629{
630 switch (c) {
631 case 'd':
632 case 'e':
633 case 'f':
634 case 'z':
635 case 'q':
636 case 'Q':
637 return 1;
638 case 'E':
639 case 'w':
640 return nr_counters > 1 ? 1 : 0;
641 case 'F':
642 case 's':
643 case 'S':
83a0944f
IM
644 return vmlinux_name ? 1 : 0;
645 default:
646 break;
091bd2e9
MG
647 }
648
649 return 0;
923c42c1
MG
650}
651
652static void handle_keypress(int c)
653{
091bd2e9
MG
654 if (!key_mapped(c)) {
655 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
656 struct termios tc, save;
657
658 print_mapped_keys();
659 fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
660 fflush(stdout);
661
662 tcgetattr(0, &save);
663 tc = save;
664 tc.c_lflag &= ~(ICANON | ECHO);
665 tc.c_cc[VMIN] = 0;
666 tc.c_cc[VTIME] = 0;
667 tcsetattr(0, TCSANOW, &tc);
668
669 poll(&stdin_poll, 1, -1);
670 c = getc(stdin);
671
672 tcsetattr(0, TCSAFLUSH, &save);
673 if (!key_mapped(c))
674 return;
675 }
676
923c42c1
MG
677 switch (c) {
678 case 'd':
679 prompt_integer(&delay_secs, "Enter display delay");
680 break;
681 case 'e':
682 prompt_integer(&print_entries, "Enter display entries (lines)");
683 break;
684 case 'E':
685 if (nr_counters > 1) {
686 int i;
687
688 fprintf(stderr, "\nAvailable events:");
689 for (i = 0; i < nr_counters; i++)
690 fprintf(stderr, "\n\t%d %s", i, event_name(i));
691
692 prompt_integer(&sym_counter, "Enter details event counter");
693
694 if (sym_counter >= nr_counters) {
695 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
696 sym_counter = 0;
697 sleep(1);
698 }
699 } else sym_counter = 0;
700 break;
701 case 'f':
702 prompt_integer(&count_filter, "Enter display event count filter");
703 break;
704 case 'F':
705 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
706 break;
707 case 'q':
708 case 'Q':
709 printf("exiting.\n");
710 exit(0);
711 case 's':
712 prompt_symbol(&sym_filter_entry, "Enter details symbol");
713 break;
714 case 'S':
715 if (!sym_filter_entry)
716 break;
717 else {
718 struct sym_entry *syme = sym_filter_entry;
719
720 pthread_mutex_lock(&syme->source_lock);
721 sym_filter_entry = NULL;
722 __zero_source_counters(syme);
723 pthread_mutex_unlock(&syme->source_lock);
724 }
725 break;
46ab9764
MG
726 case 'w':
727 display_weighted = ~display_weighted;
728 break;
923c42c1
MG
729 case 'z':
730 zero = ~zero;
731 break;
83a0944f
IM
732 default:
733 break;
923c42c1
MG
734 }
735}
736
f37a291c 737static void *display_thread(void *arg __used)
07800601 738{
0f5486b5 739 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
923c42c1
MG
740 struct termios tc, save;
741 int delay_msecs, c;
742
743 tcgetattr(0, &save);
744 tc = save;
745 tc.c_lflag &= ~(ICANON | ECHO);
746 tc.c_cc[VMIN] = 0;
747 tc.c_cc[VTIME] = 0;
091bd2e9 748
923c42c1
MG
749repeat:
750 delay_msecs = delay_secs * 1000;
751 tcsetattr(0, TCSANOW, &tc);
752 /* trash return*/
753 getc(stdin);
07800601 754
0f5486b5 755 do {
07800601 756 print_sym_table();
0f5486b5
FW
757 } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
758
923c42c1
MG
759 c = getc(stdin);
760 tcsetattr(0, TCSAFLUSH, &save);
761
762 handle_keypress(c);
763 goto repeat;
07800601
IM
764
765 return NULL;
766}
767
2ab52083 768/* Tag samples to be skipped. */
f37a291c 769static const char *skip_symbols[] = {
2ab52083
AB
770 "default_idle",
771 "cpu_idle",
772 "enter_idle",
773 "exit_idle",
774 "mwait_idle",
59b90056 775 "mwait_idle_with_hints",
3a3393ef
AB
776 "ppc64_runlatch_off",
777 "pseries_dedicated_idle_sleep",
2ab52083
AB
778 NULL
779};
780
de04687f 781static int symbol_filter(struct dso *self, struct symbol *sym)
07800601 782{
de04687f
ACM
783 struct sym_entry *syme;
784 const char *name = sym->name;
2ab52083 785 int i;
de04687f 786
3a3393ef
AB
787 /*
788 * ppc64 uses function descriptors and appends a '.' to the
789 * start of every instruction address. Remove it.
790 */
791 if (name[0] == '.')
792 name++;
793
de04687f
ACM
794 if (!strcmp(name, "_text") ||
795 !strcmp(name, "_etext") ||
796 !strcmp(name, "_sinittext") ||
797 !strncmp("init_module", name, 11) ||
798 !strncmp("cleanup_module", name, 14) ||
799 strstr(name, "_text_start") ||
800 strstr(name, "_text_end"))
07800601 801 return 1;
07800601 802
de04687f 803 syme = dso__sym_priv(self, sym);
923c42c1
MG
804 pthread_mutex_init(&syme->source_lock, NULL);
805 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
806 sym_filter_entry = syme;
807
2ab52083
AB
808 for (i = 0; skip_symbols[i]; i++) {
809 if (!strcmp(skip_symbols[i], name)) {
810 syme->skip = 1;
811 break;
812 }
813 }
07800601 814
07800601
IM
815 return 0;
816}
817
de04687f 818static int parse_symbols(void)
07800601 819{
de04687f
ACM
820 struct rb_node *node;
821 struct symbol *sym;
83a0944f 822 int use_modules = vmlinux_name ? 1 : 0;
07800601 823
de04687f
ACM
824 kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry));
825 if (kernel_dso == NULL)
826 return -1;
07800601 827
83a0944f 828 if (dso__load_kernel(kernel_dso, vmlinux_name, symbol_filter, verbose, use_modules) <= 0)
de04687f 829 goto out_delete_dso;
07800601 830
de04687f
ACM
831 node = rb_first(&kernel_dso->syms);
832 sym = rb_entry(node, struct symbol, rb_node);
833 min_ip = sym->start;
07800601 834
de04687f
ACM
835 node = rb_last(&kernel_dso->syms);
836 sym = rb_entry(node, struct symbol, rb_node);
da417a75 837 max_ip = sym->end;
07800601 838
de04687f 839 if (dump_symtab)
a3ec8d70 840 dso__fprintf(kernel_dso, stderr);
07800601 841
de04687f 842 return 0;
07800601 843
de04687f
ACM
844out_delete_dso:
845 dso__delete(kernel_dso);
846 kernel_dso = NULL;
847 return -1;
07800601
IM
848}
849
07800601
IM
850/*
851 * Binary search in the histogram table and record the hit:
852 */
9cffa8d5 853static void record_ip(u64 ip, int counter)
07800601 854{
de04687f 855 struct symbol *sym = dso__find_symbol(kernel_dso, ip);
07800601 856
de04687f
ACM
857 if (sym != NULL) {
858 struct sym_entry *syme = dso__sym_priv(kernel_dso, sym);
07800601 859
de04687f
ACM
860 if (!syme->skip) {
861 syme->count[counter]++;
923c42c1 862 record_precise_ip(syme, counter, ip);
c44613a4 863 pthread_mutex_lock(&active_symbols_lock);
de04687f 864 if (list_empty(&syme->node) || !syme->node.next)
c44613a4
ACM
865 __list_insert_active_sym(syme);
866 pthread_mutex_unlock(&active_symbols_lock);
de04687f 867 return;
07800601 868 }
07800601
IM
869 }
870
2debbc83 871 samples--;
07800601
IM
872}
873
e6e18ec7 874static void process_event(u64 ip, int counter, int user)
07800601 875{
2debbc83 876 samples++;
07800601 877
e6e18ec7 878 if (user) {
2debbc83 879 userspace_samples++;
07800601
IM
880 return;
881 }
882
883 record_ip(ip, counter);
884}
885
07800601 886struct mmap_data {
a21ca2ca
IM
887 int counter;
888 void *base;
f37a291c 889 int mask;
a21ca2ca 890 unsigned int prev;
07800601
IM
891};
892
893static unsigned int mmap_read_head(struct mmap_data *md)
894{
895 struct perf_counter_mmap_page *pc = md->base;
896 int head;
897
898 head = pc->data_head;
899 rmb();
900
901 return head;
902}
903
904struct timeval last_read, this_read;
905
2f01190a 906static void mmap_read_counter(struct mmap_data *md)
07800601
IM
907{
908 unsigned int head = mmap_read_head(md);
909 unsigned int old = md->prev;
910 unsigned char *data = md->base + page_size;
911 int diff;
912
913 gettimeofday(&this_read, NULL);
914
915 /*
916 * If we're further behind than half the buffer, there's a chance
2debbc83 917 * the writer will bite our tail and mess up the samples under us.
07800601
IM
918 *
919 * If we somehow ended up ahead of the head, we got messed up.
920 *
921 * In either case, truncate and restart at head.
922 */
923 diff = head - old;
924 if (diff > md->mask / 2 || diff < 0) {
925 struct timeval iv;
926 unsigned long msecs;
927
928 timersub(&this_read, &last_read, &iv);
929 msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
930
931 fprintf(stderr, "WARNING: failed to keep up with mmap data."
932 " Last read %lu msecs ago.\n", msecs);
933
934 /*
935 * head points to a known good entry, start there.
936 */
937 old = head;
938 }
939
940 last_read = this_read;
941
942 for (; old != head;) {
07800601
IM
943 event_t *event = (event_t *)&data[old & md->mask];
944
945 event_t event_copy;
946
6f06ccbc 947 size_t size = event->header.size;
07800601
IM
948
949 /*
950 * Event straddles the mmap boundary -- header should always
951 * be inside due to u64 alignment of output.
952 */
953 if ((old & md->mask) + size != ((old + size) & md->mask)) {
954 unsigned int offset = old;
955 unsigned int len = min(sizeof(*event), size), cpy;
956 void *dst = &event_copy;
957
958 do {
959 cpy = min(md->mask + 1 - (offset & md->mask), len);
960 memcpy(dst, &data[offset & md->mask], cpy);
961 offset += cpy;
962 dst += cpy;
963 len -= cpy;
964 } while (len);
965
966 event = &event_copy;
967 }
968
969 old += size;
970
e6e18ec7
PZ
971 if (event->header.type == PERF_EVENT_SAMPLE) {
972 int user =
973 (event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK) == PERF_EVENT_MISC_USER;
974 process_event(event->ip.ip, md->counter, user);
07800601
IM
975 }
976 }
977
978 md->prev = old;
979}
980
c2990a2a
MG
981static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
982static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
983
2f01190a
FW
984static void mmap_read(void)
985{
986 int i, counter;
987
988 for (i = 0; i < nr_cpus; i++) {
989 for (counter = 0; counter < nr_counters; counter++)
990 mmap_read_counter(&mmap_array[i][counter]);
991 }
992}
993
716c69fe
IM
994int nr_poll;
995int group_fd;
996
997static void start_counter(int i, int counter)
07800601 998{
a21ca2ca 999 struct perf_counter_attr *attr;
0fdc7e67 1000 int cpu;
716c69fe
IM
1001
1002 cpu = profile_cpu;
1003 if (target_pid == -1 && profile_cpu == -1)
1004 cpu = i;
1005
1006 attr = attrs + counter;
1007
1008 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID;
1009 attr->freq = freq;
0fdc7e67 1010 attr->inherit = (cpu < 0) && inherit;
716c69fe
IM
1011
1012try_again:
1013 fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0);
1014
1015 if (fd[i][counter] < 0) {
1016 int err = errno;
1017
716c69fe 1018 if (err == EPERM)
3da297a6 1019 die("No permission - are you root?\n");
716c69fe
IM
1020 /*
1021 * If it's cycles then fall back to hrtimer
1022 * based cpu-clock-tick sw counter, which
1023 * is always available even if no PMU support:
1024 */
1025 if (attr->type == PERF_TYPE_HARDWARE
f4dbfa8f 1026 && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
716c69fe 1027
3da297a6
IM
1028 if (verbose)
1029 warning(" ... trying to fall back to cpu-clock-ticks\n");
1030
716c69fe 1031 attr->type = PERF_TYPE_SOFTWARE;
f4dbfa8f 1032 attr->config = PERF_COUNT_SW_CPU_CLOCK;
716c69fe
IM
1033 goto try_again;
1034 }
30c806a0
IM
1035 printf("\n");
1036 error("perfcounter syscall returned with %d (%s)\n",
1037 fd[i][counter], strerror(err));
1038 die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n");
716c69fe
IM
1039 exit(-1);
1040 }
1041 assert(fd[i][counter] >= 0);
1042 fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
1043
1044 /*
1045 * First counter acts as the group leader:
1046 */
1047 if (group && group_fd == -1)
1048 group_fd = fd[i][counter];
1049
1050 event_array[nr_poll].fd = fd[i][counter];
1051 event_array[nr_poll].events = POLLIN;
1052 nr_poll++;
1053
1054 mmap_array[i][counter].counter = counter;
1055 mmap_array[i][counter].prev = 0;
1056 mmap_array[i][counter].mask = mmap_pages*page_size - 1;
1057 mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
1058 PROT_READ, MAP_SHARED, fd[i][counter], 0);
1059 if (mmap_array[i][counter].base == MAP_FAILED)
1060 die("failed to mmap with %d (%s)\n", errno, strerror(errno));
1061}
1062
1063static int __cmd_top(void)
1064{
1065 pthread_t thread;
1066 int i, counter;
07800601
IM
1067 int ret;
1068
07800601
IM
1069 for (i = 0; i < nr_cpus; i++) {
1070 group_fd = -1;
716c69fe
IM
1071 for (counter = 0; counter < nr_counters; counter++)
1072 start_counter(i, counter);
07800601
IM
1073 }
1074
2f01190a
FW
1075 /* Wait for a minimal set of events before starting the snapshot */
1076 poll(event_array, nr_poll, 100);
1077
1078 mmap_read();
1079
07800601
IM
1080 if (pthread_create(&thread, NULL, display_thread, NULL)) {
1081 printf("Could not create display thread.\n");
1082 exit(-1);
1083 }
1084
1085 if (realtime_prio) {
1086 struct sched_param param;
1087
1088 param.sched_priority = realtime_prio;
1089 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1090 printf("Could not set realtime priority.\n");
1091 exit(-1);
1092 }
1093 }
1094
1095 while (1) {
2debbc83 1096 int hits = samples;
07800601 1097
2f01190a 1098 mmap_read();
07800601 1099
2debbc83 1100 if (hits == samples)
07800601
IM
1101 ret = poll(event_array, nr_poll, 100);
1102 }
1103
1104 return 0;
1105}
b456bae0
IM
1106
1107static const char * const top_usage[] = {
1108 "perf top [<options>]",
1109 NULL
1110};
1111
b456bae0
IM
1112static const struct option options[] = {
1113 OPT_CALLBACK('e', "event", NULL, "event",
86847b62
TG
1114 "event selector. use 'perf list' to list available events",
1115 parse_events),
b456bae0
IM
1116 OPT_INTEGER('c', "count", &default_interval,
1117 "event period to sample"),
1118 OPT_INTEGER('p', "pid", &target_pid,
1119 "profile events on existing pid"),
1120 OPT_BOOLEAN('a', "all-cpus", &system_wide,
1121 "system-wide collection from all CPUs"),
1122 OPT_INTEGER('C', "CPU", &profile_cpu,
1123 "CPU to profile on"),
83a0944f 1124 OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
b456bae0
IM
1125 OPT_INTEGER('m', "mmap-pages", &mmap_pages,
1126 "number of mmap data pages"),
1127 OPT_INTEGER('r', "realtime", &realtime_prio,
1128 "collect data with this RT SCHED_FIFO priority"),
db20c003 1129 OPT_INTEGER('d', "delay", &delay_secs,
b456bae0
IM
1130 "number of seconds to delay between refreshes"),
1131 OPT_BOOLEAN('D', "dump-symtab", &dump_symtab,
1132 "dump the symbol table used for profiling"),
6e53cdf1 1133 OPT_INTEGER('f', "count-filter", &count_filter,
b456bae0
IM
1134 "only display functions with more events than this"),
1135 OPT_BOOLEAN('g', "group", &group,
1136 "put the counters into a counter group"),
0fdc7e67
MG
1137 OPT_BOOLEAN('i', "inherit", &inherit,
1138 "child tasks inherit counters"),
923c42c1
MG
1139 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
1140 "symbol to annotate - requires -k option"),
1f208ea6 1141 OPT_BOOLEAN('z', "zero", &zero,
b456bae0 1142 "zero history across updates"),
6e53cdf1 1143 OPT_INTEGER('F', "freq", &freq,
b456bae0 1144 "profile at this frequency"),
6e53cdf1
IM
1145 OPT_INTEGER('E', "entries", &print_entries,
1146 "display this many functions"),
3da297a6
IM
1147 OPT_BOOLEAN('v', "verbose", &verbose,
1148 "be more verbose (show counter open errors, etc)"),
b456bae0
IM
1149 OPT_END()
1150};
1151
f37a291c 1152int cmd_top(int argc, const char **argv, const char *prefix __used)
b456bae0
IM
1153{
1154 int counter;
1155
42976487
MG
1156 symbol__init();
1157
b456bae0
IM
1158 page_size = sysconf(_SC_PAGE_SIZE);
1159
b456bae0
IM
1160 argc = parse_options(argc, argv, options, top_usage, 0);
1161 if (argc)
1162 usage_with_options(top_usage, options);
1163
1164 if (freq) {
1165 default_interval = freq;
1166 freq = 1;
1167 }
1168
1169 /* CPU and PID are mutually exclusive */
1170 if (target_pid != -1 && profile_cpu != -1) {
1171 printf("WARNING: PID switch overriding CPU\n");
1172 sleep(1);
1173 profile_cpu = -1;
1174 }
1175
a21ca2ca 1176 if (!nr_counters)
b456bae0 1177 nr_counters = 1;
b456bae0 1178
2f335a02
FW
1179 if (delay_secs < 1)
1180 delay_secs = 1;
1181
a21ca2ca 1182 parse_symbols();
923c42c1 1183 parse_source(sym_filter_entry);
a21ca2ca
IM
1184
1185 /*
1186 * Fill in the ones not specifically initialized via -c:
1187 */
b456bae0 1188 for (counter = 0; counter < nr_counters; counter++) {
a21ca2ca 1189 if (attrs[counter].sample_period)
b456bae0
IM
1190 continue;
1191
a21ca2ca 1192 attrs[counter].sample_period = default_interval;
b456bae0
IM
1193 }
1194
1195 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
1196 assert(nr_cpus <= MAX_NR_CPUS);
1197 assert(nr_cpus >= 0);
1198
1199 if (target_pid != -1 || profile_cpu != -1)
1200 nr_cpus = 1;
1201
b456bae0
IM
1202 return __cmd_top();
1203}