]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - tools/perf/builtin-kmem.c
perf kmem: Support using -f to override perf.data file ownership
[mirror_ubuntu-artful-kernel.git] / tools / perf / builtin-kmem.c
1 #include "builtin.h"
2 #include "perf.h"
3
4 #include "util/evlist.h"
5 #include "util/evsel.h"
6 #include "util/util.h"
7 #include "util/cache.h"
8 #include "util/symbol.h"
9 #include "util/thread.h"
10 #include "util/header.h"
11 #include "util/session.h"
12 #include "util/tool.h"
13
14 #include "util/parse-options.h"
15 #include "util/trace-event.h"
16 #include "util/data.h"
17 #include "util/cpumap.h"
18
19 #include "util/debug.h"
20
21 #include <linux/rbtree.h>
22 #include <linux/string.h>
23 #include <locale.h>
24
25 struct alloc_stat;
26 typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
27
28 static int alloc_flag;
29 static int caller_flag;
30
31 static int alloc_lines = -1;
32 static int caller_lines = -1;
33
34 static bool raw_ip;
35
36 struct alloc_stat {
37 u64 call_site;
38 u64 ptr;
39 u64 bytes_req;
40 u64 bytes_alloc;
41 u32 hit;
42 u32 pingpong;
43
44 short alloc_cpu;
45
46 struct rb_node node;
47 };
48
49 static struct rb_root root_alloc_stat;
50 static struct rb_root root_alloc_sorted;
51 static struct rb_root root_caller_stat;
52 static struct rb_root root_caller_sorted;
53
54 static unsigned long total_requested, total_allocated;
55 static unsigned long nr_allocs, nr_cross_allocs;
56
57 static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
58 int bytes_req, int bytes_alloc, int cpu)
59 {
60 struct rb_node **node = &root_alloc_stat.rb_node;
61 struct rb_node *parent = NULL;
62 struct alloc_stat *data = NULL;
63
64 while (*node) {
65 parent = *node;
66 data = rb_entry(*node, struct alloc_stat, node);
67
68 if (ptr > data->ptr)
69 node = &(*node)->rb_right;
70 else if (ptr < data->ptr)
71 node = &(*node)->rb_left;
72 else
73 break;
74 }
75
76 if (data && data->ptr == ptr) {
77 data->hit++;
78 data->bytes_req += bytes_req;
79 data->bytes_alloc += bytes_alloc;
80 } else {
81 data = malloc(sizeof(*data));
82 if (!data) {
83 pr_err("%s: malloc failed\n", __func__);
84 return -1;
85 }
86 data->ptr = ptr;
87 data->pingpong = 0;
88 data->hit = 1;
89 data->bytes_req = bytes_req;
90 data->bytes_alloc = bytes_alloc;
91
92 rb_link_node(&data->node, parent, node);
93 rb_insert_color(&data->node, &root_alloc_stat);
94 }
95 data->call_site = call_site;
96 data->alloc_cpu = cpu;
97 return 0;
98 }
99
100 static int insert_caller_stat(unsigned long call_site,
101 int bytes_req, int bytes_alloc)
102 {
103 struct rb_node **node = &root_caller_stat.rb_node;
104 struct rb_node *parent = NULL;
105 struct alloc_stat *data = NULL;
106
107 while (*node) {
108 parent = *node;
109 data = rb_entry(*node, struct alloc_stat, node);
110
111 if (call_site > data->call_site)
112 node = &(*node)->rb_right;
113 else if (call_site < data->call_site)
114 node = &(*node)->rb_left;
115 else
116 break;
117 }
118
119 if (data && data->call_site == call_site) {
120 data->hit++;
121 data->bytes_req += bytes_req;
122 data->bytes_alloc += bytes_alloc;
123 } else {
124 data = malloc(sizeof(*data));
125 if (!data) {
126 pr_err("%s: malloc failed\n", __func__);
127 return -1;
128 }
129 data->call_site = call_site;
130 data->pingpong = 0;
131 data->hit = 1;
132 data->bytes_req = bytes_req;
133 data->bytes_alloc = bytes_alloc;
134
135 rb_link_node(&data->node, parent, node);
136 rb_insert_color(&data->node, &root_caller_stat);
137 }
138
139 return 0;
140 }
141
142 static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
143 struct perf_sample *sample)
144 {
145 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
146 call_site = perf_evsel__intval(evsel, sample, "call_site");
147 int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
148 bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
149
150 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
151 insert_caller_stat(call_site, bytes_req, bytes_alloc))
152 return -1;
153
154 total_requested += bytes_req;
155 total_allocated += bytes_alloc;
156
157 nr_allocs++;
158 return 0;
159 }
160
161 static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
162 struct perf_sample *sample)
163 {
164 int ret = perf_evsel__process_alloc_event(evsel, sample);
165
166 if (!ret) {
167 int node1 = cpu__get_node(sample->cpu),
168 node2 = perf_evsel__intval(evsel, sample, "node");
169
170 if (node1 != node2)
171 nr_cross_allocs++;
172 }
173
174 return ret;
175 }
176
177 static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
178 static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
179
180 static struct alloc_stat *search_alloc_stat(unsigned long ptr,
181 unsigned long call_site,
182 struct rb_root *root,
183 sort_fn_t sort_fn)
184 {
185 struct rb_node *node = root->rb_node;
186 struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
187
188 while (node) {
189 struct alloc_stat *data;
190 int cmp;
191
192 data = rb_entry(node, struct alloc_stat, node);
193
194 cmp = sort_fn(&key, data);
195 if (cmp < 0)
196 node = node->rb_left;
197 else if (cmp > 0)
198 node = node->rb_right;
199 else
200 return data;
201 }
202 return NULL;
203 }
204
205 static int perf_evsel__process_free_event(struct perf_evsel *evsel,
206 struct perf_sample *sample)
207 {
208 unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
209 struct alloc_stat *s_alloc, *s_caller;
210
211 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
212 if (!s_alloc)
213 return 0;
214
215 if ((short)sample->cpu != s_alloc->alloc_cpu) {
216 s_alloc->pingpong++;
217
218 s_caller = search_alloc_stat(0, s_alloc->call_site,
219 &root_caller_stat, callsite_cmp);
220 if (!s_caller)
221 return -1;
222 s_caller->pingpong++;
223 }
224 s_alloc->alloc_cpu = -1;
225
226 return 0;
227 }
228
229 typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
230 struct perf_sample *sample);
231
232 static int process_sample_event(struct perf_tool *tool __maybe_unused,
233 union perf_event *event,
234 struct perf_sample *sample,
235 struct perf_evsel *evsel,
236 struct machine *machine)
237 {
238 struct thread *thread = machine__findnew_thread(machine, sample->pid,
239 sample->tid);
240
241 if (thread == NULL) {
242 pr_debug("problem processing %d event, skipping it.\n",
243 event->header.type);
244 return -1;
245 }
246
247 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
248
249 if (evsel->handler != NULL) {
250 tracepoint_handler f = evsel->handler;
251 return f(evsel, sample);
252 }
253
254 return 0;
255 }
256
257 static struct perf_tool perf_kmem = {
258 .sample = process_sample_event,
259 .comm = perf_event__process_comm,
260 .mmap = perf_event__process_mmap,
261 .mmap2 = perf_event__process_mmap2,
262 .ordered_events = true,
263 };
264
265 static double fragmentation(unsigned long n_req, unsigned long n_alloc)
266 {
267 if (n_alloc == 0)
268 return 0.0;
269 else
270 return 100.0 - (100.0 * n_req / n_alloc);
271 }
272
273 static void __print_result(struct rb_root *root, struct perf_session *session,
274 int n_lines, int is_caller)
275 {
276 struct rb_node *next;
277 struct machine *machine = &session->machines.host;
278
279 printf("%.105s\n", graph_dotted_line);
280 printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
281 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
282 printf("%.105s\n", graph_dotted_line);
283
284 next = rb_first(root);
285
286 while (next && n_lines--) {
287 struct alloc_stat *data = rb_entry(next, struct alloc_stat,
288 node);
289 struct symbol *sym = NULL;
290 struct map *map;
291 char buf[BUFSIZ];
292 u64 addr;
293
294 if (is_caller) {
295 addr = data->call_site;
296 if (!raw_ip)
297 sym = machine__find_kernel_function(machine, addr, &map, NULL);
298 } else
299 addr = data->ptr;
300
301 if (sym != NULL)
302 snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
303 addr - map->unmap_ip(map, sym->start));
304 else
305 snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
306 printf(" %-34s |", buf);
307
308 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
309 (unsigned long long)data->bytes_alloc,
310 (unsigned long)data->bytes_alloc / data->hit,
311 (unsigned long long)data->bytes_req,
312 (unsigned long)data->bytes_req / data->hit,
313 (unsigned long)data->hit,
314 (unsigned long)data->pingpong,
315 fragmentation(data->bytes_req, data->bytes_alloc));
316
317 next = rb_next(next);
318 }
319
320 if (n_lines == -1)
321 printf(" ... | ... | ... | ... | ... | ... \n");
322
323 printf("%.105s\n", graph_dotted_line);
324 }
325
326 static void print_summary(void)
327 {
328 printf("\nSUMMARY\n=======\n");
329 printf("Total bytes requested: %'lu\n", total_requested);
330 printf("Total bytes allocated: %'lu\n", total_allocated);
331 printf("Total bytes wasted on internal fragmentation: %'lu\n",
332 total_allocated - total_requested);
333 printf("Internal fragmentation: %f%%\n",
334 fragmentation(total_requested, total_allocated));
335 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
336 }
337
338 static void print_result(struct perf_session *session)
339 {
340 if (caller_flag)
341 __print_result(&root_caller_sorted, session, caller_lines, 1);
342 if (alloc_flag)
343 __print_result(&root_alloc_sorted, session, alloc_lines, 0);
344 print_summary();
345 }
346
347 struct sort_dimension {
348 const char name[20];
349 sort_fn_t cmp;
350 struct list_head list;
351 };
352
353 static LIST_HEAD(caller_sort);
354 static LIST_HEAD(alloc_sort);
355
356 static void sort_insert(struct rb_root *root, struct alloc_stat *data,
357 struct list_head *sort_list)
358 {
359 struct rb_node **new = &(root->rb_node);
360 struct rb_node *parent = NULL;
361 struct sort_dimension *sort;
362
363 while (*new) {
364 struct alloc_stat *this;
365 int cmp = 0;
366
367 this = rb_entry(*new, struct alloc_stat, node);
368 parent = *new;
369
370 list_for_each_entry(sort, sort_list, list) {
371 cmp = sort->cmp(data, this);
372 if (cmp)
373 break;
374 }
375
376 if (cmp > 0)
377 new = &((*new)->rb_left);
378 else
379 new = &((*new)->rb_right);
380 }
381
382 rb_link_node(&data->node, parent, new);
383 rb_insert_color(&data->node, root);
384 }
385
386 static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
387 struct list_head *sort_list)
388 {
389 struct rb_node *node;
390 struct alloc_stat *data;
391
392 for (;;) {
393 node = rb_first(root);
394 if (!node)
395 break;
396
397 rb_erase(node, root);
398 data = rb_entry(node, struct alloc_stat, node);
399 sort_insert(root_sorted, data, sort_list);
400 }
401 }
402
403 static void sort_result(void)
404 {
405 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
406 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
407 }
408
409 static int __cmd_kmem(struct perf_session *session)
410 {
411 int err = -EINVAL;
412 const struct perf_evsel_str_handler kmem_tracepoints[] = {
413 { "kmem:kmalloc", perf_evsel__process_alloc_event, },
414 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
415 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
416 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
417 { "kmem:kfree", perf_evsel__process_free_event, },
418 { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
419 };
420
421 if (!perf_session__has_traces(session, "kmem record"))
422 goto out;
423
424 if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
425 pr_err("Initializing perf session tracepoint handlers failed\n");
426 goto out;
427 }
428
429 setup_pager();
430 err = perf_session__process_events(session);
431 if (err != 0)
432 goto out;
433 sort_result();
434 print_result(session);
435 out:
436 return err;
437 }
438
439 static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
440 {
441 if (l->ptr < r->ptr)
442 return -1;
443 else if (l->ptr > r->ptr)
444 return 1;
445 return 0;
446 }
447
448 static struct sort_dimension ptr_sort_dimension = {
449 .name = "ptr",
450 .cmp = ptr_cmp,
451 };
452
453 static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
454 {
455 if (l->call_site < r->call_site)
456 return -1;
457 else if (l->call_site > r->call_site)
458 return 1;
459 return 0;
460 }
461
462 static struct sort_dimension callsite_sort_dimension = {
463 .name = "callsite",
464 .cmp = callsite_cmp,
465 };
466
467 static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
468 {
469 if (l->hit < r->hit)
470 return -1;
471 else if (l->hit > r->hit)
472 return 1;
473 return 0;
474 }
475
476 static struct sort_dimension hit_sort_dimension = {
477 .name = "hit",
478 .cmp = hit_cmp,
479 };
480
481 static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
482 {
483 if (l->bytes_alloc < r->bytes_alloc)
484 return -1;
485 else if (l->bytes_alloc > r->bytes_alloc)
486 return 1;
487 return 0;
488 }
489
490 static struct sort_dimension bytes_sort_dimension = {
491 .name = "bytes",
492 .cmp = bytes_cmp,
493 };
494
495 static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
496 {
497 double x, y;
498
499 x = fragmentation(l->bytes_req, l->bytes_alloc);
500 y = fragmentation(r->bytes_req, r->bytes_alloc);
501
502 if (x < y)
503 return -1;
504 else if (x > y)
505 return 1;
506 return 0;
507 }
508
509 static struct sort_dimension frag_sort_dimension = {
510 .name = "frag",
511 .cmp = frag_cmp,
512 };
513
514 static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
515 {
516 if (l->pingpong < r->pingpong)
517 return -1;
518 else if (l->pingpong > r->pingpong)
519 return 1;
520 return 0;
521 }
522
523 static struct sort_dimension pingpong_sort_dimension = {
524 .name = "pingpong",
525 .cmp = pingpong_cmp,
526 };
527
528 static struct sort_dimension *avail_sorts[] = {
529 &ptr_sort_dimension,
530 &callsite_sort_dimension,
531 &hit_sort_dimension,
532 &bytes_sort_dimension,
533 &frag_sort_dimension,
534 &pingpong_sort_dimension,
535 };
536
537 #define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
538
539 static int sort_dimension__add(const char *tok, struct list_head *list)
540 {
541 struct sort_dimension *sort;
542 int i;
543
544 for (i = 0; i < NUM_AVAIL_SORTS; i++) {
545 if (!strcmp(avail_sorts[i]->name, tok)) {
546 sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
547 if (!sort) {
548 pr_err("%s: memdup failed\n", __func__);
549 return -1;
550 }
551 list_add_tail(&sort->list, list);
552 return 0;
553 }
554 }
555
556 return -1;
557 }
558
559 static int setup_sorting(struct list_head *sort_list, const char *arg)
560 {
561 char *tok;
562 char *str = strdup(arg);
563 char *pos = str;
564
565 if (!str) {
566 pr_err("%s: strdup failed\n", __func__);
567 return -1;
568 }
569
570 while (true) {
571 tok = strsep(&pos, ",");
572 if (!tok)
573 break;
574 if (sort_dimension__add(tok, sort_list) < 0) {
575 error("Unknown --sort key: '%s'", tok);
576 free(str);
577 return -1;
578 }
579 }
580
581 free(str);
582 return 0;
583 }
584
585 static int parse_sort_opt(const struct option *opt __maybe_unused,
586 const char *arg, int unset __maybe_unused)
587 {
588 if (!arg)
589 return -1;
590
591 if (caller_flag > alloc_flag)
592 return setup_sorting(&caller_sort, arg);
593 else
594 return setup_sorting(&alloc_sort, arg);
595
596 return 0;
597 }
598
599 static int parse_caller_opt(const struct option *opt __maybe_unused,
600 const char *arg __maybe_unused,
601 int unset __maybe_unused)
602 {
603 caller_flag = (alloc_flag + 1);
604 return 0;
605 }
606
607 static int parse_alloc_opt(const struct option *opt __maybe_unused,
608 const char *arg __maybe_unused,
609 int unset __maybe_unused)
610 {
611 alloc_flag = (caller_flag + 1);
612 return 0;
613 }
614
615 static int parse_line_opt(const struct option *opt __maybe_unused,
616 const char *arg, int unset __maybe_unused)
617 {
618 int lines;
619
620 if (!arg)
621 return -1;
622
623 lines = strtoul(arg, NULL, 10);
624
625 if (caller_flag > alloc_flag)
626 caller_lines = lines;
627 else
628 alloc_lines = lines;
629
630 return 0;
631 }
632
633 static int __cmd_record(int argc, const char **argv)
634 {
635 const char * const record_args[] = {
636 "record", "-a", "-R", "-c", "1",
637 "-e", "kmem:kmalloc",
638 "-e", "kmem:kmalloc_node",
639 "-e", "kmem:kfree",
640 "-e", "kmem:kmem_cache_alloc",
641 "-e", "kmem:kmem_cache_alloc_node",
642 "-e", "kmem:kmem_cache_free",
643 };
644 unsigned int rec_argc, i, j;
645 const char **rec_argv;
646
647 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
648 rec_argv = calloc(rec_argc + 1, sizeof(char *));
649
650 if (rec_argv == NULL)
651 return -ENOMEM;
652
653 for (i = 0; i < ARRAY_SIZE(record_args); i++)
654 rec_argv[i] = strdup(record_args[i]);
655
656 for (j = 1; j < (unsigned int)argc; j++, i++)
657 rec_argv[i] = argv[j];
658
659 return cmd_record(i, rec_argv, NULL);
660 }
661
662 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
663 {
664 const char * const default_sort_order = "frag,hit,bytes";
665 struct perf_data_file file = {
666 .path = input_name,
667 .mode = PERF_DATA_MODE_READ,
668 };
669 const struct option kmem_options[] = {
670 OPT_STRING('i', "input", &input_name, "file", "input file name"),
671 OPT_INCR('v', "verbose", &verbose,
672 "be more verbose (show symbol address, etc)"),
673 OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
674 "show per-callsite statistics", parse_caller_opt),
675 OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
676 "show per-allocation statistics", parse_alloc_opt),
677 OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
678 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
679 parse_sort_opt),
680 OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
681 OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
682 OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
683 OPT_END()
684 };
685 const char *const kmem_subcommands[] = { "record", "stat", NULL };
686 const char *kmem_usage[] = {
687 NULL,
688 NULL
689 };
690 struct perf_session *session;
691 int ret = -1;
692
693 argc = parse_options_subcommand(argc, argv, kmem_options,
694 kmem_subcommands, kmem_usage, 0);
695
696 if (!argc)
697 usage_with_options(kmem_usage, kmem_options);
698
699 if (!strncmp(argv[0], "rec", 3)) {
700 symbol__init(NULL);
701 return __cmd_record(argc, argv);
702 }
703
704 session = perf_session__new(&file, false, &perf_kmem);
705 if (session == NULL)
706 return -1;
707
708 symbol__init(&session->header.env);
709
710 if (!strcmp(argv[0], "stat")) {
711 setlocale(LC_ALL, "");
712
713 if (cpu__setup_cpunode_map())
714 goto out_delete;
715
716 if (list_empty(&caller_sort))
717 setup_sorting(&caller_sort, default_sort_order);
718 if (list_empty(&alloc_sort))
719 setup_sorting(&alloc_sort, default_sort_order);
720
721 ret = __cmd_kmem(session);
722 } else
723 usage_with_options(kmem_usage, kmem_options);
724
725 out_delete:
726 perf_session__delete(session);
727
728 return ret;
729 }
730