4 #include "util/evlist.h"
5 #include "util/evsel.h"
7 #include "util/cache.h"
8 #include "util/symbol.h"
9 #include "util/thread.h"
10 #include "util/header.h"
11 #include "util/session.h"
12 #include "util/tool.h"
14 #include "util/parse-options.h"
15 #include "util/trace-event.h"
16 #include "util/data.h"
17 #include "util/cpumap.h"
19 #include "util/debug.h"
21 #include <linux/rbtree.h>
22 #include <linux/string.h>
26 typedef int (*sort_fn_t
)(struct alloc_stat
*, struct alloc_stat
*);
28 static int alloc_flag
;
29 static int caller_flag
;
31 static int alloc_lines
= -1;
32 static int caller_lines
= -1;
49 static struct rb_root root_alloc_stat
;
50 static struct rb_root root_alloc_sorted
;
51 static struct rb_root root_caller_stat
;
52 static struct rb_root root_caller_sorted
;
54 static unsigned long total_requested
, total_allocated
;
55 static unsigned long nr_allocs
, nr_cross_allocs
;
57 static int insert_alloc_stat(unsigned long call_site
, unsigned long ptr
,
58 int bytes_req
, int bytes_alloc
, int cpu
)
60 struct rb_node
**node
= &root_alloc_stat
.rb_node
;
61 struct rb_node
*parent
= NULL
;
62 struct alloc_stat
*data
= NULL
;
66 data
= rb_entry(*node
, struct alloc_stat
, node
);
69 node
= &(*node
)->rb_right
;
70 else if (ptr
< data
->ptr
)
71 node
= &(*node
)->rb_left
;
76 if (data
&& data
->ptr
== ptr
) {
78 data
->bytes_req
+= bytes_req
;
79 data
->bytes_alloc
+= bytes_alloc
;
81 data
= malloc(sizeof(*data
));
83 pr_err("%s: malloc failed\n", __func__
);
89 data
->bytes_req
= bytes_req
;
90 data
->bytes_alloc
= bytes_alloc
;
92 rb_link_node(&data
->node
, parent
, node
);
93 rb_insert_color(&data
->node
, &root_alloc_stat
);
95 data
->call_site
= call_site
;
96 data
->alloc_cpu
= cpu
;
100 static int insert_caller_stat(unsigned long call_site
,
101 int bytes_req
, int bytes_alloc
)
103 struct rb_node
**node
= &root_caller_stat
.rb_node
;
104 struct rb_node
*parent
= NULL
;
105 struct alloc_stat
*data
= NULL
;
109 data
= rb_entry(*node
, struct alloc_stat
, node
);
111 if (call_site
> data
->call_site
)
112 node
= &(*node
)->rb_right
;
113 else if (call_site
< data
->call_site
)
114 node
= &(*node
)->rb_left
;
119 if (data
&& data
->call_site
== call_site
) {
121 data
->bytes_req
+= bytes_req
;
122 data
->bytes_alloc
+= bytes_alloc
;
124 data
= malloc(sizeof(*data
));
126 pr_err("%s: malloc failed\n", __func__
);
129 data
->call_site
= call_site
;
132 data
->bytes_req
= bytes_req
;
133 data
->bytes_alloc
= bytes_alloc
;
135 rb_link_node(&data
->node
, parent
, node
);
136 rb_insert_color(&data
->node
, &root_caller_stat
);
142 static int perf_evsel__process_alloc_event(struct perf_evsel
*evsel
,
143 struct perf_sample
*sample
)
145 unsigned long ptr
= perf_evsel__intval(evsel
, sample
, "ptr"),
146 call_site
= perf_evsel__intval(evsel
, sample
, "call_site");
147 int bytes_req
= perf_evsel__intval(evsel
, sample
, "bytes_req"),
148 bytes_alloc
= perf_evsel__intval(evsel
, sample
, "bytes_alloc");
150 if (insert_alloc_stat(call_site
, ptr
, bytes_req
, bytes_alloc
, sample
->cpu
) ||
151 insert_caller_stat(call_site
, bytes_req
, bytes_alloc
))
154 total_requested
+= bytes_req
;
155 total_allocated
+= bytes_alloc
;
161 static int perf_evsel__process_alloc_node_event(struct perf_evsel
*evsel
,
162 struct perf_sample
*sample
)
164 int ret
= perf_evsel__process_alloc_event(evsel
, sample
);
167 int node1
= cpu__get_node(sample
->cpu
),
168 node2
= perf_evsel__intval(evsel
, sample
, "node");
177 static int ptr_cmp(struct alloc_stat
*, struct alloc_stat
*);
178 static int callsite_cmp(struct alloc_stat
*, struct alloc_stat
*);
180 static struct alloc_stat
*search_alloc_stat(unsigned long ptr
,
181 unsigned long call_site
,
182 struct rb_root
*root
,
185 struct rb_node
*node
= root
->rb_node
;
186 struct alloc_stat key
= { .ptr
= ptr
, .call_site
= call_site
};
189 struct alloc_stat
*data
;
192 data
= rb_entry(node
, struct alloc_stat
, node
);
194 cmp
= sort_fn(&key
, data
);
196 node
= node
->rb_left
;
198 node
= node
->rb_right
;
205 static int perf_evsel__process_free_event(struct perf_evsel
*evsel
,
206 struct perf_sample
*sample
)
208 unsigned long ptr
= perf_evsel__intval(evsel
, sample
, "ptr");
209 struct alloc_stat
*s_alloc
, *s_caller
;
211 s_alloc
= search_alloc_stat(ptr
, 0, &root_alloc_stat
, ptr_cmp
);
215 if ((short)sample
->cpu
!= s_alloc
->alloc_cpu
) {
218 s_caller
= search_alloc_stat(0, s_alloc
->call_site
,
219 &root_caller_stat
, callsite_cmp
);
222 s_caller
->pingpong
++;
224 s_alloc
->alloc_cpu
= -1;
229 typedef int (*tracepoint_handler
)(struct perf_evsel
*evsel
,
230 struct perf_sample
*sample
);
232 static int process_sample_event(struct perf_tool
*tool __maybe_unused
,
233 union perf_event
*event
,
234 struct perf_sample
*sample
,
235 struct perf_evsel
*evsel
,
236 struct machine
*machine
)
238 struct thread
*thread
= machine__findnew_thread(machine
, sample
->pid
,
241 if (thread
== NULL
) {
242 pr_debug("problem processing %d event, skipping it.\n",
247 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread
), thread
->tid
);
249 if (evsel
->handler
!= NULL
) {
250 tracepoint_handler f
= evsel
->handler
;
251 return f(evsel
, sample
);
257 static struct perf_tool perf_kmem
= {
258 .sample
= process_sample_event
,
259 .comm
= perf_event__process_comm
,
260 .mmap
= perf_event__process_mmap
,
261 .mmap2
= perf_event__process_mmap2
,
262 .ordered_events
= true,
265 static double fragmentation(unsigned long n_req
, unsigned long n_alloc
)
270 return 100.0 - (100.0 * n_req
/ n_alloc
);
273 static void __print_result(struct rb_root
*root
, struct perf_session
*session
,
274 int n_lines
, int is_caller
)
276 struct rb_node
*next
;
277 struct machine
*machine
= &session
->machines
.host
;
279 printf("%.105s\n", graph_dotted_line
);
280 printf(" %-34s |", is_caller
? "Callsite": "Alloc Ptr");
281 printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
282 printf("%.105s\n", graph_dotted_line
);
284 next
= rb_first(root
);
286 while (next
&& n_lines
--) {
287 struct alloc_stat
*data
= rb_entry(next
, struct alloc_stat
,
289 struct symbol
*sym
= NULL
;
295 addr
= data
->call_site
;
297 sym
= machine__find_kernel_function(machine
, addr
, &map
, NULL
);
302 snprintf(buf
, sizeof(buf
), "%s+%" PRIx64
"", sym
->name
,
303 addr
- map
->unmap_ip(map
, sym
->start
));
305 snprintf(buf
, sizeof(buf
), "%#" PRIx64
"", addr
);
306 printf(" %-34s |", buf
);
308 printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
309 (unsigned long long)data
->bytes_alloc
,
310 (unsigned long)data
->bytes_alloc
/ data
->hit
,
311 (unsigned long long)data
->bytes_req
,
312 (unsigned long)data
->bytes_req
/ data
->hit
,
313 (unsigned long)data
->hit
,
314 (unsigned long)data
->pingpong
,
315 fragmentation(data
->bytes_req
, data
->bytes_alloc
));
317 next
= rb_next(next
);
321 printf(" ... | ... | ... | ... | ... | ... \n");
323 printf("%.105s\n", graph_dotted_line
);
326 static void print_summary(void)
328 printf("\nSUMMARY\n=======\n");
329 printf("Total bytes requested: %'lu\n", total_requested
);
330 printf("Total bytes allocated: %'lu\n", total_allocated
);
331 printf("Total bytes wasted on internal fragmentation: %'lu\n",
332 total_allocated
- total_requested
);
333 printf("Internal fragmentation: %f%%\n",
334 fragmentation(total_requested
, total_allocated
));
335 printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs
, nr_allocs
);
338 static void print_result(struct perf_session
*session
)
341 __print_result(&root_caller_sorted
, session
, caller_lines
, 1);
343 __print_result(&root_alloc_sorted
, session
, alloc_lines
, 0);
347 struct sort_dimension
{
350 struct list_head list
;
353 static LIST_HEAD(caller_sort
);
354 static LIST_HEAD(alloc_sort
);
356 static void sort_insert(struct rb_root
*root
, struct alloc_stat
*data
,
357 struct list_head
*sort_list
)
359 struct rb_node
**new = &(root
->rb_node
);
360 struct rb_node
*parent
= NULL
;
361 struct sort_dimension
*sort
;
364 struct alloc_stat
*this;
367 this = rb_entry(*new, struct alloc_stat
, node
);
370 list_for_each_entry(sort
, sort_list
, list
) {
371 cmp
= sort
->cmp(data
, this);
377 new = &((*new)->rb_left
);
379 new = &((*new)->rb_right
);
382 rb_link_node(&data
->node
, parent
, new);
383 rb_insert_color(&data
->node
, root
);
386 static void __sort_result(struct rb_root
*root
, struct rb_root
*root_sorted
,
387 struct list_head
*sort_list
)
389 struct rb_node
*node
;
390 struct alloc_stat
*data
;
393 node
= rb_first(root
);
397 rb_erase(node
, root
);
398 data
= rb_entry(node
, struct alloc_stat
, node
);
399 sort_insert(root_sorted
, data
, sort_list
);
403 static void sort_result(void)
405 __sort_result(&root_alloc_stat
, &root_alloc_sorted
, &alloc_sort
);
406 __sort_result(&root_caller_stat
, &root_caller_sorted
, &caller_sort
);
409 static int __cmd_kmem(struct perf_session
*session
)
412 const struct perf_evsel_str_handler kmem_tracepoints
[] = {
413 { "kmem:kmalloc", perf_evsel__process_alloc_event
, },
414 { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event
, },
415 { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event
, },
416 { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event
, },
417 { "kmem:kfree", perf_evsel__process_free_event
, },
418 { "kmem:kmem_cache_free", perf_evsel__process_free_event
, },
421 if (!perf_session__has_traces(session
, "kmem record"))
424 if (perf_session__set_tracepoints_handlers(session
, kmem_tracepoints
)) {
425 pr_err("Initializing perf session tracepoint handlers failed\n");
430 err
= perf_session__process_events(session
);
434 print_result(session
);
439 static int ptr_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
443 else if (l
->ptr
> r
->ptr
)
448 static struct sort_dimension ptr_sort_dimension
= {
453 static int callsite_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
455 if (l
->call_site
< r
->call_site
)
457 else if (l
->call_site
> r
->call_site
)
462 static struct sort_dimension callsite_sort_dimension
= {
467 static int hit_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
471 else if (l
->hit
> r
->hit
)
476 static struct sort_dimension hit_sort_dimension
= {
481 static int bytes_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
483 if (l
->bytes_alloc
< r
->bytes_alloc
)
485 else if (l
->bytes_alloc
> r
->bytes_alloc
)
490 static struct sort_dimension bytes_sort_dimension
= {
495 static int frag_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
499 x
= fragmentation(l
->bytes_req
, l
->bytes_alloc
);
500 y
= fragmentation(r
->bytes_req
, r
->bytes_alloc
);
509 static struct sort_dimension frag_sort_dimension
= {
514 static int pingpong_cmp(struct alloc_stat
*l
, struct alloc_stat
*r
)
516 if (l
->pingpong
< r
->pingpong
)
518 else if (l
->pingpong
> r
->pingpong
)
523 static struct sort_dimension pingpong_sort_dimension
= {
528 static struct sort_dimension
*avail_sorts
[] = {
530 &callsite_sort_dimension
,
532 &bytes_sort_dimension
,
533 &frag_sort_dimension
,
534 &pingpong_sort_dimension
,
537 #define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
539 static int sort_dimension__add(const char *tok
, struct list_head
*list
)
541 struct sort_dimension
*sort
;
544 for (i
= 0; i
< NUM_AVAIL_SORTS
; i
++) {
545 if (!strcmp(avail_sorts
[i
]->name
, tok
)) {
546 sort
= memdup(avail_sorts
[i
], sizeof(*avail_sorts
[i
]));
548 pr_err("%s: memdup failed\n", __func__
);
551 list_add_tail(&sort
->list
, list
);
559 static int setup_sorting(struct list_head
*sort_list
, const char *arg
)
562 char *str
= strdup(arg
);
566 pr_err("%s: strdup failed\n", __func__
);
571 tok
= strsep(&pos
, ",");
574 if (sort_dimension__add(tok
, sort_list
) < 0) {
575 error("Unknown --sort key: '%s'", tok
);
585 static int parse_sort_opt(const struct option
*opt __maybe_unused
,
586 const char *arg
, int unset __maybe_unused
)
591 if (caller_flag
> alloc_flag
)
592 return setup_sorting(&caller_sort
, arg
);
594 return setup_sorting(&alloc_sort
, arg
);
599 static int parse_caller_opt(const struct option
*opt __maybe_unused
,
600 const char *arg __maybe_unused
,
601 int unset __maybe_unused
)
603 caller_flag
= (alloc_flag
+ 1);
607 static int parse_alloc_opt(const struct option
*opt __maybe_unused
,
608 const char *arg __maybe_unused
,
609 int unset __maybe_unused
)
611 alloc_flag
= (caller_flag
+ 1);
615 static int parse_line_opt(const struct option
*opt __maybe_unused
,
616 const char *arg
, int unset __maybe_unused
)
623 lines
= strtoul(arg
, NULL
, 10);
625 if (caller_flag
> alloc_flag
)
626 caller_lines
= lines
;
633 static int __cmd_record(int argc
, const char **argv
)
635 const char * const record_args
[] = {
636 "record", "-a", "-R", "-c", "1",
637 "-e", "kmem:kmalloc",
638 "-e", "kmem:kmalloc_node",
640 "-e", "kmem:kmem_cache_alloc",
641 "-e", "kmem:kmem_cache_alloc_node",
642 "-e", "kmem:kmem_cache_free",
644 unsigned int rec_argc
, i
, j
;
645 const char **rec_argv
;
647 rec_argc
= ARRAY_SIZE(record_args
) + argc
- 1;
648 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
650 if (rec_argv
== NULL
)
653 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
654 rec_argv
[i
] = strdup(record_args
[i
]);
656 for (j
= 1; j
< (unsigned int)argc
; j
++, i
++)
657 rec_argv
[i
] = argv
[j
];
659 return cmd_record(i
, rec_argv
, NULL
);
662 int cmd_kmem(int argc
, const char **argv
, const char *prefix __maybe_unused
)
664 const char * const default_sort_order
= "frag,hit,bytes";
665 struct perf_data_file file
= {
667 .mode
= PERF_DATA_MODE_READ
,
669 const struct option kmem_options
[] = {
670 OPT_STRING('i', "input", &input_name
, "file", "input file name"),
671 OPT_INCR('v', "verbose", &verbose
,
672 "be more verbose (show symbol address, etc)"),
673 OPT_CALLBACK_NOOPT(0, "caller", NULL
, NULL
,
674 "show per-callsite statistics", parse_caller_opt
),
675 OPT_CALLBACK_NOOPT(0, "alloc", NULL
, NULL
,
676 "show per-allocation statistics", parse_alloc_opt
),
677 OPT_CALLBACK('s', "sort", NULL
, "key[,key2...]",
678 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
680 OPT_CALLBACK('l', "line", NULL
, "num", "show n lines", parse_line_opt
),
681 OPT_BOOLEAN(0, "raw-ip", &raw_ip
, "show raw ip instead of symbol"),
682 OPT_BOOLEAN('f', "force", &file
.force
, "don't complain, do it"),
685 const char *const kmem_subcommands
[] = { "record", "stat", NULL
};
686 const char *kmem_usage
[] = {
690 struct perf_session
*session
;
693 argc
= parse_options_subcommand(argc
, argv
, kmem_options
,
694 kmem_subcommands
, kmem_usage
, 0);
697 usage_with_options(kmem_usage
, kmem_options
);
699 if (!strncmp(argv
[0], "rec", 3)) {
701 return __cmd_record(argc
, argv
);
704 session
= perf_session__new(&file
, false, &perf_kmem
);
708 symbol__init(&session
->header
.env
);
710 if (!strcmp(argv
[0], "stat")) {
711 setlocale(LC_ALL
, "");
713 if (cpu__setup_cpunode_map())
716 if (list_empty(&caller_sort
))
717 setup_sorting(&caller_sort
, default_sort_order
);
718 if (list_empty(&alloc_sort
))
719 setup_sorting(&alloc_sort
, default_sort_order
);
721 ret
= __cmd_kmem(session
);
723 usage_with_options(kmem_usage
, kmem_options
);
726 perf_session__delete(session
);