]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - tools/perf/builtin-lock.c
Add a document describing the padata interface
[mirror_ubuntu-hirsute-kernel.git] / tools / perf / builtin-lock.c
1 #include "builtin.h"
2 #include "perf.h"
3
4 #include "util/util.h"
5 #include "util/cache.h"
6 #include "util/symbol.h"
7 #include "util/thread.h"
8 #include "util/header.h"
9
10 #include "util/parse-options.h"
11 #include "util/trace-event.h"
12
13 #include "util/debug.h"
14 #include "util/session.h"
15
16 #include <sys/types.h>
17 #include <sys/prctl.h>
18 #include <semaphore.h>
19 #include <pthread.h>
20 #include <math.h>
21 #include <limits.h>
22
23 #include <linux/list.h>
24 #include <linux/hash.h>
25
26 /* based on kernel/lockdep.c */
27 #define LOCKHASH_BITS 12
28 #define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
29
30 static struct list_head lockhash_table[LOCKHASH_SIZE];
31
32 #define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
33 #define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
34
35 #define LOCK_STATE_UNLOCKED 0 /* initial state */
36 #define LOCK_STATE_LOCKED 1
37
38 struct lock_stat {
39 struct list_head hash_entry;
40 struct rb_node rb; /* used for sorting */
41
42 /*
43 * FIXME: raw_field_value() returns unsigned long long,
44 * so address of lockdep_map should be dealed as 64bit.
45 * Is there more better solution?
46 */
47 void *addr; /* address of lockdep_map, used as ID */
48 char *name; /* for strcpy(), we cannot use const */
49
50 int state;
51 u64 prev_event_time; /* timestamp of previous event */
52
53 unsigned int nr_acquired;
54 unsigned int nr_acquire;
55 unsigned int nr_contended;
56 unsigned int nr_release;
57
58 /* these times are in nano sec. */
59 u64 wait_time_total;
60 u64 wait_time_min;
61 u64 wait_time_max;
62 };
63
64 /* build simple key function one is bigger than two */
65 #define SINGLE_KEY(member) \
66 static int lock_stat_key_ ## member(struct lock_stat *one, \
67 struct lock_stat *two) \
68 { \
69 return one->member > two->member; \
70 }
71
72 SINGLE_KEY(nr_acquired)
73 SINGLE_KEY(nr_contended)
74 SINGLE_KEY(wait_time_total)
75 SINGLE_KEY(wait_time_min)
76 SINGLE_KEY(wait_time_max)
77
78 struct lock_key {
79 /*
80 * name: the value for specify by user
81 * this should be simpler than raw name of member
82 * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
83 */
84 const char *name;
85 int (*key)(struct lock_stat*, struct lock_stat*);
86 };
87
88 static const char *sort_key = "acquired";
89
90 static int (*compare)(struct lock_stat *, struct lock_stat *);
91
92 static struct rb_root result; /* place to store sorted data */
93
94 #define DEF_KEY_LOCK(name, fn_suffix) \
95 { #name, lock_stat_key_ ## fn_suffix }
96 struct lock_key keys[] = {
97 DEF_KEY_LOCK(acquired, nr_acquired),
98 DEF_KEY_LOCK(contended, nr_contended),
99 DEF_KEY_LOCK(wait_total, wait_time_total),
100 DEF_KEY_LOCK(wait_min, wait_time_min),
101 DEF_KEY_LOCK(wait_max, wait_time_max),
102
103 /* extra comparisons much complicated should be here */
104
105 { NULL, NULL }
106 };
107
108 static void select_key(void)
109 {
110 int i;
111
112 for (i = 0; keys[i].name; i++) {
113 if (!strcmp(keys[i].name, sort_key)) {
114 compare = keys[i].key;
115 return;
116 }
117 }
118
119 die("Unknown compare key:%s\n", sort_key);
120 }
121
122 static void insert_to_result(struct lock_stat *st,
123 int (*bigger)(struct lock_stat *, struct lock_stat *))
124 {
125 struct rb_node **rb = &result.rb_node;
126 struct rb_node *parent = NULL;
127 struct lock_stat *p;
128
129 while (*rb) {
130 p = container_of(*rb, struct lock_stat, rb);
131 parent = *rb;
132
133 if (bigger(st, p))
134 rb = &(*rb)->rb_left;
135 else
136 rb = &(*rb)->rb_right;
137 }
138
139 rb_link_node(&st->rb, parent, rb);
140 rb_insert_color(&st->rb, &result);
141 }
142
143 /* returns left most element of result, and erase it */
144 static struct lock_stat *pop_from_result(void)
145 {
146 struct rb_node *node = result.rb_node;
147
148 if (!node)
149 return NULL;
150
151 while (node->rb_left)
152 node = node->rb_left;
153
154 rb_erase(node, &result);
155 return container_of(node, struct lock_stat, rb);
156 }
157
158 static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
159 {
160 struct list_head *entry = lockhashentry(addr);
161 struct lock_stat *ret, *new;
162
163 list_for_each_entry(ret, entry, hash_entry) {
164 if (ret->addr == addr)
165 return ret;
166 }
167
168 new = zalloc(sizeof(struct lock_stat));
169 if (!new)
170 goto alloc_failed;
171
172 new->addr = addr;
173 new->name = zalloc(sizeof(char) * strlen(name) + 1);
174 if (!new->name)
175 goto alloc_failed;
176 strcpy(new->name, name);
177
178 /* LOCK_STATE_UNLOCKED == 0 isn't guaranteed forever */
179 new->state = LOCK_STATE_UNLOCKED;
180 new->wait_time_min = ULLONG_MAX;
181
182 list_add(&new->hash_entry, entry);
183 return new;
184
185 alloc_failed:
186 die("memory allocation failed\n");
187 }
188
189 static char const *input_name = "perf.data";
190
191 static int profile_cpu = -1;
192
193 struct raw_event_sample {
194 u32 size;
195 char data[0];
196 };
197
198 struct trace_acquire_event {
199 void *addr;
200 const char *name;
201 };
202
203 struct trace_acquired_event {
204 void *addr;
205 const char *name;
206 };
207
208 struct trace_contended_event {
209 void *addr;
210 const char *name;
211 };
212
213 struct trace_release_event {
214 void *addr;
215 const char *name;
216 };
217
218 struct trace_lock_handler {
219 void (*acquire_event)(struct trace_acquire_event *,
220 struct event *,
221 int cpu,
222 u64 timestamp,
223 struct thread *thread);
224
225 void (*acquired_event)(struct trace_acquired_event *,
226 struct event *,
227 int cpu,
228 u64 timestamp,
229 struct thread *thread);
230
231 void (*contended_event)(struct trace_contended_event *,
232 struct event *,
233 int cpu,
234 u64 timestamp,
235 struct thread *thread);
236
237 void (*release_event)(struct trace_release_event *,
238 struct event *,
239 int cpu,
240 u64 timestamp,
241 struct thread *thread);
242 };
243
244 static void
245 report_lock_acquire_event(struct trace_acquire_event *acquire_event,
246 struct event *__event __used,
247 int cpu __used,
248 u64 timestamp,
249 struct thread *thread __used)
250 {
251 struct lock_stat *st;
252
253 st = lock_stat_findnew(acquire_event->addr, acquire_event->name);
254
255 switch (st->state) {
256 case LOCK_STATE_UNLOCKED:
257 break;
258 case LOCK_STATE_LOCKED:
259 break;
260 default:
261 BUG_ON(1);
262 break;
263 }
264
265 st->prev_event_time = timestamp;
266 }
267
268 static void
269 report_lock_acquired_event(struct trace_acquired_event *acquired_event,
270 struct event *__event __used,
271 int cpu __used,
272 u64 timestamp,
273 struct thread *thread __used)
274 {
275 struct lock_stat *st;
276
277 st = lock_stat_findnew(acquired_event->addr, acquired_event->name);
278
279 switch (st->state) {
280 case LOCK_STATE_UNLOCKED:
281 st->state = LOCK_STATE_LOCKED;
282 st->nr_acquired++;
283 break;
284 case LOCK_STATE_LOCKED:
285 break;
286 default:
287 BUG_ON(1);
288 break;
289 }
290
291 st->prev_event_time = timestamp;
292 }
293
294 static void
295 report_lock_contended_event(struct trace_contended_event *contended_event,
296 struct event *__event __used,
297 int cpu __used,
298 u64 timestamp,
299 struct thread *thread __used)
300 {
301 struct lock_stat *st;
302
303 st = lock_stat_findnew(contended_event->addr, contended_event->name);
304
305 switch (st->state) {
306 case LOCK_STATE_UNLOCKED:
307 break;
308 case LOCK_STATE_LOCKED:
309 st->nr_contended++;
310 break;
311 default:
312 BUG_ON(1);
313 break;
314 }
315
316 st->prev_event_time = timestamp;
317 }
318
319 static void
320 report_lock_release_event(struct trace_release_event *release_event,
321 struct event *__event __used,
322 int cpu __used,
323 u64 timestamp,
324 struct thread *thread __used)
325 {
326 struct lock_stat *st;
327 u64 hold_time;
328
329 st = lock_stat_findnew(release_event->addr, release_event->name);
330
331 switch (st->state) {
332 case LOCK_STATE_UNLOCKED:
333 break;
334 case LOCK_STATE_LOCKED:
335 st->state = LOCK_STATE_UNLOCKED;
336 hold_time = timestamp - st->prev_event_time;
337
338 if (timestamp < st->prev_event_time) {
339 /* terribly, this can happen... */
340 goto end;
341 }
342
343 if (st->wait_time_min > hold_time)
344 st->wait_time_min = hold_time;
345 if (st->wait_time_max < hold_time)
346 st->wait_time_max = hold_time;
347 st->wait_time_total += hold_time;
348
349 st->nr_release++;
350 break;
351 default:
352 BUG_ON(1);
353 break;
354 }
355
356 end:
357 st->prev_event_time = timestamp;
358 }
359
360 /* lock oriented handlers */
361 /* TODO: handlers for CPU oriented, thread oriented */
362 static struct trace_lock_handler report_lock_ops = {
363 .acquire_event = report_lock_acquire_event,
364 .acquired_event = report_lock_acquired_event,
365 .contended_event = report_lock_contended_event,
366 .release_event = report_lock_release_event,
367 };
368
369 static struct trace_lock_handler *trace_handler;
370
371 static void
372 process_lock_acquire_event(void *data,
373 struct event *event __used,
374 int cpu __used,
375 u64 timestamp __used,
376 struct thread *thread __used)
377 {
378 struct trace_acquire_event acquire_event;
379 u64 tmp; /* this is required for casting... */
380
381 tmp = raw_field_value(event, "lockdep_addr", data);
382 memcpy(&acquire_event.addr, &tmp, sizeof(void *));
383 acquire_event.name = (char *)raw_field_ptr(event, "name", data);
384
385 if (trace_handler->acquire_event)
386 trace_handler->acquire_event(&acquire_event, event, cpu, timestamp, thread);
387 }
388
389 static void
390 process_lock_acquired_event(void *data,
391 struct event *event __used,
392 int cpu __used,
393 u64 timestamp __used,
394 struct thread *thread __used)
395 {
396 struct trace_acquired_event acquired_event;
397 u64 tmp; /* this is required for casting... */
398
399 tmp = raw_field_value(event, "lockdep_addr", data);
400 memcpy(&acquired_event.addr, &tmp, sizeof(void *));
401 acquired_event.name = (char *)raw_field_ptr(event, "name", data);
402
403 if (trace_handler->acquire_event)
404 trace_handler->acquired_event(&acquired_event, event, cpu, timestamp, thread);
405 }
406
407 static void
408 process_lock_contended_event(void *data,
409 struct event *event __used,
410 int cpu __used,
411 u64 timestamp __used,
412 struct thread *thread __used)
413 {
414 struct trace_contended_event contended_event;
415 u64 tmp; /* this is required for casting... */
416
417 tmp = raw_field_value(event, "lockdep_addr", data);
418 memcpy(&contended_event.addr, &tmp, sizeof(void *));
419 contended_event.name = (char *)raw_field_ptr(event, "name", data);
420
421 if (trace_handler->acquire_event)
422 trace_handler->contended_event(&contended_event, event, cpu, timestamp, thread);
423 }
424
425 static void
426 process_lock_release_event(void *data,
427 struct event *event __used,
428 int cpu __used,
429 u64 timestamp __used,
430 struct thread *thread __used)
431 {
432 struct trace_release_event release_event;
433 u64 tmp; /* this is required for casting... */
434
435 tmp = raw_field_value(event, "lockdep_addr", data);
436 memcpy(&release_event.addr, &tmp, sizeof(void *));
437 release_event.name = (char *)raw_field_ptr(event, "name", data);
438
439 if (trace_handler->acquire_event)
440 trace_handler->release_event(&release_event, event, cpu, timestamp, thread);
441 }
442
443 static void
444 process_raw_event(void *data, int cpu,
445 u64 timestamp, struct thread *thread)
446 {
447 struct event *event;
448 int type;
449
450 type = trace_parse_common_type(data);
451 event = trace_find_event(type);
452
453 if (!strcmp(event->name, "lock_acquire"))
454 process_lock_acquire_event(data, event, cpu, timestamp, thread);
455 if (!strcmp(event->name, "lock_acquired"))
456 process_lock_acquired_event(data, event, cpu, timestamp, thread);
457 if (!strcmp(event->name, "lock_contended"))
458 process_lock_contended_event(data, event, cpu, timestamp, thread);
459 if (!strcmp(event->name, "lock_release"))
460 process_lock_release_event(data, event, cpu, timestamp, thread);
461 }
462
463 struct raw_event_queue {
464 u64 timestamp;
465 int cpu;
466 void *data;
467 struct thread *thread;
468 struct list_head list;
469 };
470
471 static LIST_HEAD(raw_event_head);
472
473 #define FLUSH_PERIOD (5 * NSEC_PER_SEC)
474
475 static u64 flush_limit = ULLONG_MAX;
476 static u64 last_flush = 0;
477 struct raw_event_queue *last_inserted;
478
479 static void flush_raw_event_queue(u64 limit)
480 {
481 struct raw_event_queue *tmp, *iter;
482
483 list_for_each_entry_safe(iter, tmp, &raw_event_head, list) {
484 if (iter->timestamp > limit)
485 return;
486
487 if (iter == last_inserted)
488 last_inserted = NULL;
489
490 process_raw_event(iter->data, iter->cpu, iter->timestamp,
491 iter->thread);
492
493 last_flush = iter->timestamp;
494 list_del(&iter->list);
495 free(iter->data);
496 free(iter);
497 }
498 }
499
500 static void __queue_raw_event_end(struct raw_event_queue *new)
501 {
502 struct raw_event_queue *iter;
503
504 list_for_each_entry_reverse(iter, &raw_event_head, list) {
505 if (iter->timestamp < new->timestamp) {
506 list_add(&new->list, &iter->list);
507 return;
508 }
509 }
510
511 list_add(&new->list, &raw_event_head);
512 }
513
514 static void __queue_raw_event_before(struct raw_event_queue *new,
515 struct raw_event_queue *iter)
516 {
517 list_for_each_entry_continue_reverse(iter, &raw_event_head, list) {
518 if (iter->timestamp < new->timestamp) {
519 list_add(&new->list, &iter->list);
520 return;
521 }
522 }
523
524 list_add(&new->list, &raw_event_head);
525 }
526
527 static void __queue_raw_event_after(struct raw_event_queue *new,
528 struct raw_event_queue *iter)
529 {
530 list_for_each_entry_continue(iter, &raw_event_head, list) {
531 if (iter->timestamp > new->timestamp) {
532 list_add_tail(&new->list, &iter->list);
533 return;
534 }
535 }
536 list_add_tail(&new->list, &raw_event_head);
537 }
538
539 /* The queue is ordered by time */
540 static void __queue_raw_event(struct raw_event_queue *new)
541 {
542 if (!last_inserted) {
543 __queue_raw_event_end(new);
544 return;
545 }
546
547 /*
548 * Most of the time the current event has a timestamp
549 * very close to the last event inserted, unless we just switched
550 * to another event buffer. Having a sorting based on a list and
551 * on the last inserted event that is close to the current one is
552 * probably more efficient than an rbtree based sorting.
553 */
554 if (last_inserted->timestamp >= new->timestamp)
555 __queue_raw_event_before(new, last_inserted);
556 else
557 __queue_raw_event_after(new, last_inserted);
558 }
559
560 static void queue_raw_event(void *data, int raw_size, int cpu,
561 u64 timestamp, struct thread *thread)
562 {
563 struct raw_event_queue *new;
564
565 if (flush_limit == ULLONG_MAX)
566 flush_limit = timestamp + FLUSH_PERIOD;
567
568 if (timestamp < last_flush) {
569 printf("Warning: Timestamp below last timeslice flush\n");
570 return;
571 }
572
573 new = malloc(sizeof(*new));
574 if (!new)
575 die("Not enough memory\n");
576
577 new->timestamp = timestamp;
578 new->cpu = cpu;
579 new->thread = thread;
580
581 new->data = malloc(raw_size);
582 if (!new->data)
583 die("Not enough memory\n");
584
585 memcpy(new->data, data, raw_size);
586
587 __queue_raw_event(new);
588 last_inserted = new;
589
590 /*
591 * We want to have a slice of events covering 2 * FLUSH_PERIOD
592 * If FLUSH_PERIOD is big enough, it ensures every events that occured
593 * in the first half of the timeslice have all been buffered and there
594 * are none remaining (we need that because of the weakly ordered
595 * event recording we have). Then once we reach the 2 * FLUSH_PERIOD
596 * timeslice, we flush the first half to be gentle with the memory
597 * (the second half can still get new events in the middle, so wait
598 * another period to flush it)
599 */
600 if (new->timestamp > flush_limit &&
601 new->timestamp - flush_limit > FLUSH_PERIOD) {
602 flush_limit += FLUSH_PERIOD;
603 flush_raw_event_queue(flush_limit);
604 }
605 }
606
607 static int process_sample_event(event_t *event, struct perf_session *session)
608 {
609 struct thread *thread;
610 struct sample_data data;
611
612 bzero(&data, sizeof(struct sample_data));
613 event__parse_sample(event, session->sample_type, &data);
614 thread = perf_session__findnew(session, data.pid);
615
616 if (thread == NULL) {
617 pr_debug("problem processing %d event, skipping it.\n",
618 event->header.type);
619 return -1;
620 }
621
622 dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
623
624 if (profile_cpu != -1 && profile_cpu != (int) data.cpu)
625 return 0;
626
627 queue_raw_event(data.raw_data, data.raw_size, data.cpu, data.time, thread);
628
629 return 0;
630 }
631
632 /* TODO: various way to print, coloring, nano or milli sec */
633 static void print_result(void)
634 {
635 struct lock_stat *st;
636 char cut_name[20];
637
638 printf("%18s ", "ID");
639 printf("%20s ", "Name");
640 printf("%10s ", "acquired");
641 printf("%10s ", "contended");
642
643 printf("%15s ", "total wait (ns)");
644 printf("%15s ", "max wait (ns)");
645 printf("%15s ", "min wait (ns)");
646
647 printf("\n\n");
648
649 while ((st = pop_from_result())) {
650 bzero(cut_name, 20);
651
652 printf("%p ", st->addr);
653
654 if (strlen(st->name) < 16) {
655 /* output raw name */
656 printf("%20s ", st->name);
657 } else {
658 strncpy(cut_name, st->name, 16);
659 cut_name[16] = '.';
660 cut_name[17] = '.';
661 cut_name[18] = '.';
662 cut_name[19] = '\0';
663 /* cut off name for saving output style */
664 printf("%20s ", cut_name);
665 }
666
667 printf("%10u ", st->nr_acquired);
668 printf("%10u ", st->nr_contended);
669
670 printf("%15llu ", st->wait_time_total);
671 printf("%15llu ", st->wait_time_max);
672 printf("%15llu ", st->wait_time_min == ULLONG_MAX ?
673 0 : st->wait_time_min);
674 printf("\n");
675 }
676 }
677
678 static void dump_map(void)
679 {
680 unsigned int i;
681 struct lock_stat *st;
682
683 for (i = 0; i < LOCKHASH_SIZE; i++) {
684 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
685 printf("%p: %s\n", st->addr, st->name);
686 }
687 }
688 }
689
690 static struct perf_event_ops eops = {
691 .sample = process_sample_event,
692 .comm = event__process_comm,
693 };
694
695 static struct perf_session *session;
696
697 static int read_events(void)
698 {
699 session = perf_session__new(input_name, O_RDONLY, 0);
700 if (!session)
701 die("Initializing perf session failed\n");
702
703 return perf_session__process_events(session, &eops);
704 }
705
706 static void sort_result(void)
707 {
708 unsigned int i;
709 struct lock_stat *st;
710
711 for (i = 0; i < LOCKHASH_SIZE; i++) {
712 list_for_each_entry(st, &lockhash_table[i], hash_entry) {
713 insert_to_result(st, compare);
714 }
715 }
716 }
717
718 static void __cmd_report(void)
719 {
720 setup_pager();
721 select_key();
722 read_events();
723 flush_raw_event_queue(ULLONG_MAX);
724 sort_result();
725 print_result();
726 }
727
728 static const char * const report_usage[] = {
729 "perf lock report [<options>]",
730 NULL
731 };
732
733 static const struct option report_options[] = {
734 OPT_STRING('k', "key", &sort_key, "acquired",
735 "key for sorting"),
736 /* TODO: type */
737 OPT_END()
738 };
739
740 static const char * const lock_usage[] = {
741 "perf lock [<options>] {record|trace|report}",
742 NULL
743 };
744
745 static const struct option lock_options[] = {
746 OPT_STRING('i', "input", &input_name, "file", "input file name"),
747 OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
748 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
749 OPT_END()
750 };
751
752 static const char *record_args[] = {
753 "record",
754 "-a",
755 "-R",
756 "-f",
757 "-m", "1024",
758 "-c", "1",
759 "-e", "lock:lock_acquire:r",
760 "-e", "lock:lock_acquired:r",
761 "-e", "lock:lock_contended:r",
762 "-e", "lock:lock_release:r",
763 };
764
765 static int __cmd_record(int argc, const char **argv)
766 {
767 unsigned int rec_argc, i, j;
768 const char **rec_argv;
769
770 rec_argc = ARRAY_SIZE(record_args) + argc - 1;
771 rec_argv = calloc(rec_argc + 1, sizeof(char *));
772
773 for (i = 0; i < ARRAY_SIZE(record_args); i++)
774 rec_argv[i] = strdup(record_args[i]);
775
776 for (j = 1; j < (unsigned int)argc; j++, i++)
777 rec_argv[i] = argv[j];
778
779 BUG_ON(i != rec_argc);
780
781 return cmd_record(i, rec_argv, NULL);
782 }
783
784 int cmd_lock(int argc, const char **argv, const char *prefix __used)
785 {
786 unsigned int i;
787
788 symbol__init();
789 for (i = 0; i < LOCKHASH_SIZE; i++)
790 INIT_LIST_HEAD(lockhash_table + i);
791
792 argc = parse_options(argc, argv, lock_options, lock_usage,
793 PARSE_OPT_STOP_AT_NON_OPTION);
794 if (!argc)
795 usage_with_options(lock_usage, lock_options);
796
797 if (!strncmp(argv[0], "rec", 3)) {
798 return __cmd_record(argc, argv);
799 } else if (!strncmp(argv[0], "report", 6)) {
800 trace_handler = &report_lock_ops;
801 if (argc) {
802 argc = parse_options(argc, argv,
803 report_options, report_usage, 0);
804 if (argc)
805 usage_with_options(report_usage, report_options);
806 }
807 __cmd_report();
808 } else if (!strcmp(argv[0], "trace")) {
809 /* Aliased to 'perf trace' */
810 return cmd_trace(argc, argv, prefix);
811 } else if (!strcmp(argv[0], "map")) {
812 /* recycling report_lock_ops */
813 trace_handler = &report_lock_ops;
814 setup_pager();
815 read_events();
816 dump_map();
817 } else {
818 usage_with_options(lock_usage, lock_options);
819 }
820
821 return 0;
822 }