4 * Builtin report command: Analyze the perf.data input file,
5 * look up and read DSOs and symbol information and display
6 * a histogram of results, along various sorting keys.
10 #include "util/util.h"
12 #include "util/color.h"
13 #include <linux/list.h>
14 #include "util/cache.h"
15 #include <linux/rbtree.h>
16 #include "util/symbol.h"
17 #include "util/string.h"
18 #include "util/callchain.h"
19 #include "util/strlist.h"
20 #include "util/values.h"
23 #include "util/debug.h"
24 #include "util/header.h"
25 #include "util/session.h"
27 #include "util/parse-options.h"
28 #include "util/parse-events.h"
30 #include "util/thread.h"
31 #include "util/sort.h"
32 #include "util/hist.h"
34 static char const *input_name
= "perf.data";
37 static bool use_callchain
;
39 static int show_nr_samples
;
41 static int show_threads
;
42 static struct perf_read_values show_threads_values
;
44 static char default_pretty_printing_style
[] = "normal";
45 static char *pretty_printing_style
= default_pretty_printing_style
;
47 static int exclude_other
= 1;
49 static char callchain_default_opt
[] = "fractal,0.5";
52 callchain__fprintf_left_margin(FILE *fp
, int left_margin
)
57 ret
= fprintf(fp
, " ");
59 for (i
= 0; i
< left_margin
; i
++)
60 ret
+= fprintf(fp
, " ");
65 static size_t ipchain__fprintf_graph_line(FILE *fp
, int depth
, int depth_mask
,
71 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
73 for (i
= 0; i
< depth
; i
++)
74 if (depth_mask
& (1 << i
))
75 ret
+= fprintf(fp
, "| ");
77 ret
+= fprintf(fp
, " ");
79 ret
+= fprintf(fp
, "\n");
84 ipchain__fprintf_graph(FILE *fp
, struct callchain_list
*chain
, int depth
,
85 int depth_mask
, int count
, u64 total_samples
,
86 int hits
, int left_margin
)
91 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
92 for (i
= 0; i
< depth
; i
++) {
93 if (depth_mask
& (1 << i
))
94 ret
+= fprintf(fp
, "|");
96 ret
+= fprintf(fp
, " ");
97 if (!count
&& i
== depth
- 1) {
100 percent
= hits
* 100.0 / total_samples
;
101 ret
+= percent_color_fprintf(fp
, "--%2.2f%%-- ", percent
);
103 ret
+= fprintf(fp
, "%s", " ");
106 ret
+= fprintf(fp
, "%s\n", chain
->sym
->name
);
108 ret
+= fprintf(fp
, "%p\n", (void *)(long)chain
->ip
);
113 static struct symbol
*rem_sq_bracket
;
114 static struct callchain_list rem_hits
;
116 static void init_rem_hits(void)
118 rem_sq_bracket
= malloc(sizeof(*rem_sq_bracket
) + 6);
119 if (!rem_sq_bracket
) {
120 fprintf(stderr
, "Not enough memory to display remaining hits\n");
124 strcpy(rem_sq_bracket
->name
, "[...]");
125 rem_hits
.sym
= rem_sq_bracket
;
129 __callchain__fprintf_graph(FILE *fp
, struct callchain_node
*self
,
130 u64 total_samples
, int depth
, int depth_mask
,
133 struct rb_node
*node
, *next
;
134 struct callchain_node
*child
;
135 struct callchain_list
*chain
;
136 int new_depth_mask
= depth_mask
;
142 if (callchain_param
.mode
== CHAIN_GRAPH_REL
)
143 new_total
= self
->children_hit
;
145 new_total
= total_samples
;
147 remaining
= new_total
;
149 node
= rb_first(&self
->rb_root
);
153 child
= rb_entry(node
, struct callchain_node
, rb_node
);
154 cumul
= cumul_hits(child
);
158 * The depth mask manages the output of pipes that show
159 * the depth. We don't want to keep the pipes of the current
160 * level for the last child of this depth.
161 * Except if we have remaining filtered hits. They will
162 * supersede the last child
164 next
= rb_next(node
);
165 if (!next
&& (callchain_param
.mode
!= CHAIN_GRAPH_REL
|| !remaining
))
166 new_depth_mask
&= ~(1 << (depth
- 1));
169 * But we keep the older depth mask for the line seperator
170 * to keep the level link until we reach the last child
172 ret
+= ipchain__fprintf_graph_line(fp
, depth
, depth_mask
,
175 list_for_each_entry(chain
, &child
->val
, list
) {
176 if (chain
->ip
>= PERF_CONTEXT_MAX
)
178 ret
+= ipchain__fprintf_graph(fp
, chain
, depth
,
184 ret
+= __callchain__fprintf_graph(fp
, child
, new_total
,
186 new_depth_mask
| (1 << depth
),
191 if (callchain_param
.mode
== CHAIN_GRAPH_REL
&&
192 remaining
&& remaining
!= new_total
) {
197 new_depth_mask
&= ~(1 << (depth
- 1));
199 ret
+= ipchain__fprintf_graph(fp
, &rem_hits
, depth
,
200 new_depth_mask
, 0, new_total
,
201 remaining
, left_margin
);
209 callchain__fprintf_graph(FILE *fp
, struct callchain_node
*self
,
210 u64 total_samples
, int left_margin
)
212 struct callchain_list
*chain
;
213 bool printed
= false;
217 list_for_each_entry(chain
, &self
->val
, list
) {
218 if (chain
->ip
>= PERF_CONTEXT_MAX
)
221 if (!i
++ && sort__first_dimension
== SORT_SYM
)
225 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
226 ret
+= fprintf(fp
, "|\n");
227 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
228 ret
+= fprintf(fp
, "---");
233 ret
+= callchain__fprintf_left_margin(fp
, left_margin
);
236 ret
+= fprintf(fp
, " %s\n", chain
->sym
->name
);
238 ret
+= fprintf(fp
, " %p\n", (void *)(long)chain
->ip
);
241 ret
+= __callchain__fprintf_graph(fp
, self
, total_samples
, 1, 1, left_margin
);
247 callchain__fprintf_flat(FILE *fp
, struct callchain_node
*self
,
250 struct callchain_list
*chain
;
256 ret
+= callchain__fprintf_flat(fp
, self
->parent
, total_samples
);
259 list_for_each_entry(chain
, &self
->val
, list
) {
260 if (chain
->ip
>= PERF_CONTEXT_MAX
)
263 ret
+= fprintf(fp
, " %s\n", chain
->sym
->name
);
265 ret
+= fprintf(fp
, " %p\n",
266 (void *)(long)chain
->ip
);
273 hist_entry_callchain__fprintf(FILE *fp
, struct hist_entry
*self
,
274 u64 total_samples
, int left_margin
)
276 struct rb_node
*rb_node
;
277 struct callchain_node
*chain
;
280 rb_node
= rb_first(&self
->sorted_chain
);
284 chain
= rb_entry(rb_node
, struct callchain_node
, rb_node
);
285 percent
= chain
->hit
* 100.0 / total_samples
;
286 switch (callchain_param
.mode
) {
288 ret
+= percent_color_fprintf(fp
, " %6.2f%%\n",
290 ret
+= callchain__fprintf_flat(fp
, chain
, total_samples
);
292 case CHAIN_GRAPH_ABS
: /* Falldown */
293 case CHAIN_GRAPH_REL
:
294 ret
+= callchain__fprintf_graph(fp
, chain
, total_samples
,
300 ret
+= fprintf(fp
, "\n");
301 rb_node
= rb_next(rb_node
);
307 static size_t hist_entry__fprintf(FILE *fp
, struct hist_entry
*self
,
308 struct perf_session
*session
,
311 struct sort_entry
*se
;
314 if (exclude_other
&& !self
->parent
)
318 ret
= percent_color_fprintf(fp
,
319 field_sep
? "%.2f" : " %6.2f%%",
320 (self
->count
* 100.0) / total_samples
);
322 ret
= fprintf(fp
, field_sep
? "%lld" : "%12lld ", self
->count
);
324 if (show_nr_samples
) {
326 fprintf(fp
, "%c%lld", *field_sep
, self
->count
);
328 fprintf(fp
, "%11lld", self
->count
);
331 list_for_each_entry(se
, &hist_entry__sort_list
, list
) {
335 fprintf(fp
, "%s", field_sep
?: " ");
336 ret
+= se
->print(fp
, self
, se
->width
? *se
->width
: 0);
339 ret
+= fprintf(fp
, "\n");
341 if (session
->use_callchain
) {
344 if (sort__first_dimension
== SORT_COMM
) {
345 se
= list_first_entry(&hist_entry__sort_list
, typeof(*se
),
347 left_margin
= se
->width
? *se
->width
: 0;
348 left_margin
-= thread__comm_len(self
->thread
);
351 hist_entry_callchain__fprintf(fp
, self
, total_samples
,
362 static void dso__calc_col_width(struct dso
*self
)
364 if (!symbol_conf
.col_width_list_str
&& !field_sep
&&
365 (!symbol_conf
.dso_list
||
366 strlist__has_entry(symbol_conf
.dso_list
, self
->name
))) {
367 unsigned int slen
= strlen(self
->name
);
368 if (slen
> dsos__col_width
)
369 dsos__col_width
= slen
;
372 self
->slen_calculated
= 1;
375 static void thread__comm_adjust(struct thread
*self
)
377 char *comm
= self
->comm
;
379 if (!symbol_conf
.col_width_list_str
&& !field_sep
&&
380 (!symbol_conf
.comm_list
||
381 strlist__has_entry(symbol_conf
.comm_list
, comm
))) {
382 unsigned int slen
= strlen(comm
);
384 if (slen
> comms__col_width
) {
385 comms__col_width
= slen
;
386 threads__col_width
= slen
+ 6;
391 static int thread__set_comm_adjust(struct thread
*self
, const char *comm
)
393 int ret
= thread__set_comm(self
, comm
);
398 thread__comm_adjust(self
);
404 * collect histogram counts
407 static int perf_session__add_hist_entry(struct perf_session
*self
,
408 struct addr_location
*al
,
409 struct ip_callchain
*chain
, u64 count
)
411 struct symbol
**syms
= NULL
, *parent
= NULL
;
413 struct hist_entry
*he
;
415 if ((sort__has_parent
|| self
->use_callchain
) && chain
)
416 syms
= perf_session__resolve_callchain(self
, al
->thread
,
418 he
= __perf_session__add_hist_entry(self
, al
, parent
, count
, &hit
);
425 if (self
->use_callchain
) {
427 callchain_init(&he
->callchain
);
428 append_chain(&he
->callchain
, chain
, syms
);
435 static size_t perf_session__fprintf_hist_entries(struct perf_session
*self
,
436 u64 total_samples
, FILE *fp
)
438 struct hist_entry
*pos
;
439 struct sort_entry
*se
;
443 char *col_width
= symbol_conf
.col_width_list_str
;
444 int raw_printing_style
;
446 raw_printing_style
= !strcmp(pretty_printing_style
, "raw");
450 fprintf(fp
, "# Samples: %Ld\n", (u64
)total_samples
);
453 fprintf(fp
, "# Overhead");
454 if (show_nr_samples
) {
456 fprintf(fp
, "%cSamples", *field_sep
);
458 fputs(" Samples ", fp
);
460 list_for_each_entry(se
, &hist_entry__sort_list
, list
) {
464 fprintf(fp
, "%c%s", *field_sep
, se
->header
);
467 width
= strlen(se
->header
);
469 if (symbol_conf
.col_width_list_str
) {
471 *se
->width
= atoi(col_width
);
472 col_width
= strchr(col_width
, ',');
477 width
= *se
->width
= max(*se
->width
, width
);
479 fprintf(fp
, " %*s", width
, se
->header
);
486 fprintf(fp
, "# ........");
488 fprintf(fp
, " ..........");
489 list_for_each_entry(se
, &hist_entry__sort_list
, list
) {
499 width
= strlen(se
->header
);
500 for (i
= 0; i
< width
; i
++)
508 for (nd
= rb_first(&self
->hists
); nd
; nd
= rb_next(nd
)) {
509 pos
= rb_entry(nd
, struct hist_entry
, rb_node
);
510 ret
+= hist_entry__fprintf(fp
, pos
, self
, total_samples
);
513 if (sort_order
== default_sort_order
&&
514 parent_pattern
== default_parent_pattern
) {
516 fprintf(fp
, "# (For a higher level overview, try: perf report --sort comm,dso)\n");
521 free(rem_sq_bracket
);
524 perf_read_values_display(fp
, &show_threads_values
,
530 static int validate_chain(struct ip_callchain
*chain
, event_t
*event
)
532 unsigned int chain_size
;
534 chain_size
= event
->header
.size
;
535 chain_size
-= (unsigned long)&event
->ip
.__more_data
- (unsigned long)event
;
537 if (chain
->nr
*sizeof(u64
) > chain_size
)
543 static int process_sample_event(event_t
*event
, struct perf_session
*session
)
545 struct sample_data data
;
547 struct addr_location al
;
548 struct thread
*thread
;
550 memset(&data
, 0, sizeof(data
));
553 event__parse_sample(event
, session
->sample_type
, &data
);
555 dump_printf("(IP, %d): %d/%d: %p period: %Ld\n",
558 (void *)(long)data
.ip
,
559 (long long)data
.period
);
561 if (session
->sample_type
& PERF_SAMPLE_CALLCHAIN
) {
564 dump_printf("... chain: nr:%Lu\n", data
.callchain
->nr
);
566 if (validate_chain(data
.callchain
, event
) < 0) {
567 pr_debug("call-chain problem with event, "
573 for (i
= 0; i
< data
.callchain
->nr
; i
++)
574 dump_printf("..... %2d: %016Lx\n",
575 i
, data
.callchain
->ips
[i
]);
579 thread
= perf_session__findnew(session
, data
.pid
);
580 if (thread
== NULL
) {
581 pr_debug("problem processing %d event, skipping it.\n",
586 dump_printf(" ... thread: %s:%d\n", thread
->comm
, thread
->pid
);
588 if (symbol_conf
.comm_list
&&
589 !strlist__has_entry(symbol_conf
.comm_list
, thread
->comm
))
592 cpumode
= event
->header
.misc
& PERF_RECORD_MISC_CPUMODE_MASK
;
594 thread__find_addr_location(thread
, session
, cpumode
,
595 MAP__FUNCTION
, data
.ip
, &al
, NULL
);
597 * We have to do this here as we may have a dso with no symbol hit that
598 * has a name longer than the ones with symbols sampled.
600 if (al
.map
&& !sort_dso
.elide
&& !al
.map
->dso
->slen_calculated
)
601 dso__calc_col_width(al
.map
->dso
);
603 if (symbol_conf
.dso_list
&&
604 (!al
.map
|| !al
.map
->dso
||
605 !(strlist__has_entry(symbol_conf
.dso_list
, al
.map
->dso
->short_name
) ||
606 (al
.map
->dso
->short_name
!= al
.map
->dso
->long_name
&&
607 strlist__has_entry(symbol_conf
.dso_list
, al
.map
->dso
->long_name
)))))
610 if (symbol_conf
.sym_list
&& al
.sym
&&
611 !strlist__has_entry(symbol_conf
.sym_list
, al
.sym
->name
))
614 if (perf_session__add_hist_entry(session
, &al
, data
.callchain
, data
.period
)) {
615 pr_debug("problem incrementing symbol count, skipping event\n");
619 session
->events_stats
.total
+= data
.period
;
623 static int process_comm_event(event_t
*event
, struct perf_session
*session
)
625 struct thread
*thread
= perf_session__findnew(session
, event
->comm
.pid
);
627 dump_printf(": %s:%d\n", event
->comm
.comm
, event
->comm
.pid
);
629 if (thread
== NULL
||
630 thread__set_comm_adjust(thread
, event
->comm
.comm
)) {
631 dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
638 static int process_read_event(event_t
*event
, struct perf_session
*session __used
)
640 struct perf_event_attr
*attr
;
642 attr
= perf_header__find_attr(event
->read
.id
, &session
->header
);
645 const char *name
= attr
? __event_name(attr
->type
, attr
->config
)
647 perf_read_values_add_value(&show_threads_values
,
648 event
->read
.pid
, event
->read
.tid
,
654 dump_printf(": %d %d %s %Lu\n", event
->read
.pid
, event
->read
.tid
,
655 attr
? __event_name(attr
->type
, attr
->config
) : "FAIL",
661 static int sample_type_check(struct perf_session
*session
)
663 if (!(session
->sample_type
& PERF_SAMPLE_CALLCHAIN
)) {
664 if (sort__has_parent
) {
665 fprintf(stderr
, "selected --sort parent, but no"
666 " callchain data. Did you call"
667 " perf record without -g?\n");
670 if (session
->use_callchain
) {
671 fprintf(stderr
, "selected -g but no callchain data."
672 " Did you call perf record without"
676 } else if (callchain_param
.mode
!= CHAIN_NONE
&& !session
->use_callchain
) {
677 session
->use_callchain
= true;
678 if (register_callchain_param(&callchain_param
) < 0) {
679 fprintf(stderr
, "Can't register callchain"
688 static struct perf_event_ops event_ops
= {
689 .process_sample_event
= process_sample_event
,
690 .process_mmap_event
= event__process_mmap
,
691 .process_comm_event
= process_comm_event
,
692 .process_exit_event
= event__process_task
,
693 .process_fork_event
= event__process_task
,
694 .process_lost_event
= event__process_lost
,
695 .process_read_event
= process_read_event
,
696 .sample_type_check
= sample_type_check
,
700 static int __cmd_report(void)
703 struct perf_session
*session
;
705 session
= perf_session__new(input_name
, O_RDONLY
, force
);
709 session
->use_callchain
= use_callchain
;
712 perf_read_values_init(&show_threads_values
);
714 ret
= perf_session__process_events(session
, &event_ops
);
719 event__print_totals();
724 perf_session__fprintf(session
, stdout
);
727 dsos__fprintf(stdout
);
729 perf_session__collapse_resort(session
);
730 perf_session__output_resort(session
, session
->events_stats
.total
);
731 perf_session__fprintf_hist_entries(session
, session
->events_stats
.total
, stdout
);
734 perf_read_values_destroy(&show_threads_values
);
736 perf_session__delete(session
);
741 parse_callchain_opt(const struct option
*opt __used
, const char *arg
,
747 use_callchain
= true;
752 tok
= strtok((char *)arg
, ",");
756 /* get the output mode */
757 if (!strncmp(tok
, "graph", strlen(arg
)))
758 callchain_param
.mode
= CHAIN_GRAPH_ABS
;
760 else if (!strncmp(tok
, "flat", strlen(arg
)))
761 callchain_param
.mode
= CHAIN_FLAT
;
763 else if (!strncmp(tok
, "fractal", strlen(arg
)))
764 callchain_param
.mode
= CHAIN_GRAPH_REL
;
766 else if (!strncmp(tok
, "none", strlen(arg
))) {
767 callchain_param
.mode
= CHAIN_NONE
;
768 use_callchain
= true;
776 /* get the min percentage */
777 tok
= strtok(NULL
, ",");
781 callchain_param
.min_percent
= strtod(tok
, &endptr
);
786 if (register_callchain_param(&callchain_param
) < 0) {
787 fprintf(stderr
, "Can't register callchain params\n");
793 //static const char * const report_usage[] = {
794 const char * const report_usage
[] = {
795 "perf report [<options>] <command>",
799 static const struct option options
[] = {
800 OPT_STRING('i', "input", &input_name
, "file",
802 OPT_BOOLEAN('v', "verbose", &verbose
,
803 "be more verbose (show symbol address, etc)"),
804 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace
,
805 "dump raw trace in ASCII"),
806 OPT_STRING('k', "vmlinux", &symbol_conf
.vmlinux_name
,
807 "file", "vmlinux pathname"),
808 OPT_BOOLEAN('f', "force", &force
, "don't complain, do it"),
809 OPT_BOOLEAN('m', "modules", &symbol_conf
.use_modules
,
810 "load module symbols - WARNING: use only with -k and LIVE kernel"),
811 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples
,
812 "Show a column with the number of samples"),
813 OPT_BOOLEAN('T', "threads", &show_threads
,
814 "Show per-thread event counters"),
815 OPT_STRING(0, "pretty", &pretty_printing_style
, "key",
816 "pretty printing style key: normal raw"),
817 OPT_STRING('s', "sort", &sort_order
, "key[,key2...]",
818 "sort by key(s): pid, comm, dso, symbol, parent"),
819 OPT_BOOLEAN('P', "full-paths", &event_ops
.full_paths
,
820 "Don't shorten the pathnames taking into account the cwd"),
821 OPT_STRING('p', "parent", &parent_pattern
, "regex",
822 "regex filter to identify parent, see: '--sort parent'"),
823 OPT_BOOLEAN('x', "exclude-other", &exclude_other
,
824 "Only display entries with parent-match"),
825 OPT_CALLBACK_DEFAULT('g', "call-graph", NULL
, "output_type,min_percent",
826 "Display callchains using output_type and min percent threshold. "
827 "Default: fractal,0.5", &parse_callchain_opt
, callchain_default_opt
),
828 OPT_STRING('d', "dsos", &symbol_conf
.dso_list_str
, "dso[,dso...]",
829 "only consider symbols in these dsos"),
830 OPT_STRING('C', "comms", &symbol_conf
.comm_list_str
, "comm[,comm...]",
831 "only consider symbols in these comms"),
832 OPT_STRING('S', "symbols", &symbol_conf
.sym_list_str
, "symbol[,symbol...]",
833 "only consider these symbols"),
834 OPT_STRING('w', "column-widths", &symbol_conf
.col_width_list_str
,
836 "don't try to adjust column width, use these fixed values"),
837 OPT_STRING('t', "field-separator", &field_sep
, "separator",
838 "separator for columns, no spaces will be added between "
839 "columns '.' is reserved."),
843 static void sort_entry__setup_elide(struct sort_entry
*self
,
844 struct strlist
*list
,
845 const char *list_name
, FILE *fp
)
847 if (list
&& strlist__nr_entries(list
) == 1) {
848 fprintf(fp
, "# %s: %s\n", list_name
, strlist__entry(list
, 0)->s
);
853 int cmd_report(int argc
, const char **argv
, const char *prefix __used
)
855 argc
= parse_options(argc
, argv
, options
, report_usage
, 0);
859 if (symbol__init() < 0)
862 setup_sorting(report_usage
, options
);
864 if (parent_pattern
!= default_parent_pattern
) {
865 sort_dimension__add("parent");
866 sort_parent
.elide
= 1;
871 * Any (unrecognized) arguments left?
874 usage_with_options(report_usage
, options
);
876 sort_entry__setup_elide(&sort_dso
, symbol_conf
.dso_list
, "dso", stdout
);
877 sort_entry__setup_elide(&sort_comm
, symbol_conf
.comm_list
, "comm", stdout
);
878 sort_entry__setup_elide(&sort_sym
, symbol_conf
.sym_list
, "symbol", stdout
);
880 if (field_sep
&& *field_sep
== '.') {
881 fputs("'.' is the only non valid --field-separator argument\n",
886 return __cmd_report();