]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/perf/builtin-trace.c
Merge tag 'squashfs-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/pkl/squas...
[mirror_ubuntu-bionic-kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK 0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON 100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
36 #endif
37
38 struct tp_field {
39 int offset;
40 union {
41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43 };
44 };
45
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49 return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 return bswap_##bits(value);\
62 }
63
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67
68 static int tp_field__init_uint(struct tp_field *field,
69 struct format_field *format_field,
70 bool needs_swap)
71 {
72 field->offset = format_field->offset;
73
74 switch (format_field->size) {
75 case 1:
76 field->integer = tp_field__u8;
77 break;
78 case 2:
79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80 break;
81 case 4:
82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83 break;
84 case 8:
85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86 break;
87 default:
88 return -1;
89 }
90
91 return 0;
92 }
93
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96 return sample->raw_data + field->offset;
97 }
98
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101 field->offset = format_field->offset;
102 field->pointer = tp_field__ptr;
103 return 0;
104 }
105
106 struct syscall_tp {
107 struct tp_field id;
108 union {
109 struct tp_field args, ret;
110 };
111 };
112
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 struct tp_field *field,
115 const char *name)
116 {
117 struct format_field *format_field = perf_evsel__field(evsel, name);
118
119 if (format_field == NULL)
120 return -1;
121
122 return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 ({ struct syscall_tp *sc = evsel->priv;\
127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 struct tp_field *field,
131 const char *name)
132 {
133 struct format_field *format_field = perf_evsel__field(evsel, name);
134
135 if (format_field == NULL)
136 return -1;
137
138 return tp_field__init_ptr(field, format_field);
139 }
140
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 ({ struct syscall_tp *sc = evsel->priv;\
143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147 free(evsel->priv);
148 evsel->priv = NULL;
149 perf_evsel__delete(evsel);
150 }
151
152 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
153 {
154 evsel->priv = malloc(sizeof(struct syscall_tp));
155 if (evsel->priv != NULL) {
156 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
157 goto out_delete;
158
159 evsel->handler = handler;
160 return 0;
161 }
162
163 return -ENOMEM;
164
165 out_delete:
166 free(evsel->priv);
167 evsel->priv = NULL;
168 return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175 if (evsel) {
176 if (perf_evsel__init_syscall_tp(evsel, handler))
177 goto out_delete;
178 }
179
180 return evsel;
181
182 out_delete:
183 perf_evsel__delete_priv(evsel);
184 return NULL;
185 }
186
187 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
188 ({ struct syscall_tp *fields = evsel->priv; \
189 fields->name.integer(&fields->name, sample); })
190
191 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
192 ({ struct syscall_tp *fields = evsel->priv; \
193 fields->name.pointer(&fields->name, sample); })
194
195 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
196 void *sys_enter_handler,
197 void *sys_exit_handler)
198 {
199 int ret = -1;
200 struct perf_evsel *sys_enter, *sys_exit;
201
202 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
203 if (sys_enter == NULL)
204 goto out;
205
206 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
207 goto out_delete_sys_enter;
208
209 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
210 if (sys_exit == NULL)
211 goto out_delete_sys_enter;
212
213 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
214 goto out_delete_sys_exit;
215
216 perf_evlist__add(evlist, sys_enter);
217 perf_evlist__add(evlist, sys_exit);
218
219 ret = 0;
220 out:
221 return ret;
222
223 out_delete_sys_exit:
224 perf_evsel__delete_priv(sys_exit);
225 out_delete_sys_enter:
226 perf_evsel__delete_priv(sys_enter);
227 goto out;
228 }
229
230
231 struct syscall_arg {
232 unsigned long val;
233 struct thread *thread;
234 struct trace *trace;
235 void *parm;
236 u8 idx;
237 u8 mask;
238 };
239
240 struct strarray {
241 int offset;
242 int nr_entries;
243 const char **entries;
244 };
245
246 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
247 .nr_entries = ARRAY_SIZE(array), \
248 .entries = array, \
249 }
250
251 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
252 .offset = off, \
253 .nr_entries = ARRAY_SIZE(array), \
254 .entries = array, \
255 }
256
257 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
258 const char *intfmt,
259 struct syscall_arg *arg)
260 {
261 struct strarray *sa = arg->parm;
262 int idx = arg->val - sa->offset;
263
264 if (idx < 0 || idx >= sa->nr_entries)
265 return scnprintf(bf, size, intfmt, arg->val);
266
267 return scnprintf(bf, size, "%s", sa->entries[idx]);
268 }
269
270 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
271 struct syscall_arg *arg)
272 {
273 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
274 }
275
276 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
277
278 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
279 struct syscall_arg *arg)
280 {
281 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
282 }
283
284 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
285
286 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
287 struct syscall_arg *arg);
288
289 #define SCA_FD syscall_arg__scnprintf_fd
290
291 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
292 struct syscall_arg *arg)
293 {
294 int fd = arg->val;
295
296 if (fd == AT_FDCWD)
297 return scnprintf(bf, size, "CWD");
298
299 return syscall_arg__scnprintf_fd(bf, size, arg);
300 }
301
302 #define SCA_FDAT syscall_arg__scnprintf_fd_at
303
304 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
305 struct syscall_arg *arg);
306
307 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
308
309 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
310 struct syscall_arg *arg)
311 {
312 return scnprintf(bf, size, "%#lx", arg->val);
313 }
314
315 #define SCA_HEX syscall_arg__scnprintf_hex
316
317 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
318 struct syscall_arg *arg)
319 {
320 int printed = 0, prot = arg->val;
321
322 if (prot == PROT_NONE)
323 return scnprintf(bf, size, "NONE");
324 #define P_MMAP_PROT(n) \
325 if (prot & PROT_##n) { \
326 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
327 prot &= ~PROT_##n; \
328 }
329
330 P_MMAP_PROT(EXEC);
331 P_MMAP_PROT(READ);
332 P_MMAP_PROT(WRITE);
333 #ifdef PROT_SEM
334 P_MMAP_PROT(SEM);
335 #endif
336 P_MMAP_PROT(GROWSDOWN);
337 P_MMAP_PROT(GROWSUP);
338 #undef P_MMAP_PROT
339
340 if (prot)
341 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
342
343 return printed;
344 }
345
346 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
347
348 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
349 struct syscall_arg *arg)
350 {
351 int printed = 0, flags = arg->val;
352
353 #define P_MMAP_FLAG(n) \
354 if (flags & MAP_##n) { \
355 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
356 flags &= ~MAP_##n; \
357 }
358
359 P_MMAP_FLAG(SHARED);
360 P_MMAP_FLAG(PRIVATE);
361 #ifdef MAP_32BIT
362 P_MMAP_FLAG(32BIT);
363 #endif
364 P_MMAP_FLAG(ANONYMOUS);
365 P_MMAP_FLAG(DENYWRITE);
366 P_MMAP_FLAG(EXECUTABLE);
367 P_MMAP_FLAG(FILE);
368 P_MMAP_FLAG(FIXED);
369 P_MMAP_FLAG(GROWSDOWN);
370 #ifdef MAP_HUGETLB
371 P_MMAP_FLAG(HUGETLB);
372 #endif
373 P_MMAP_FLAG(LOCKED);
374 P_MMAP_FLAG(NONBLOCK);
375 P_MMAP_FLAG(NORESERVE);
376 P_MMAP_FLAG(POPULATE);
377 P_MMAP_FLAG(STACK);
378 #ifdef MAP_UNINITIALIZED
379 P_MMAP_FLAG(UNINITIALIZED);
380 #endif
381 #undef P_MMAP_FLAG
382
383 if (flags)
384 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
385
386 return printed;
387 }
388
389 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
390
391 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
392 struct syscall_arg *arg)
393 {
394 int behavior = arg->val;
395
396 switch (behavior) {
397 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
398 P_MADV_BHV(NORMAL);
399 P_MADV_BHV(RANDOM);
400 P_MADV_BHV(SEQUENTIAL);
401 P_MADV_BHV(WILLNEED);
402 P_MADV_BHV(DONTNEED);
403 P_MADV_BHV(REMOVE);
404 P_MADV_BHV(DONTFORK);
405 P_MADV_BHV(DOFORK);
406 P_MADV_BHV(HWPOISON);
407 #ifdef MADV_SOFT_OFFLINE
408 P_MADV_BHV(SOFT_OFFLINE);
409 #endif
410 P_MADV_BHV(MERGEABLE);
411 P_MADV_BHV(UNMERGEABLE);
412 #ifdef MADV_HUGEPAGE
413 P_MADV_BHV(HUGEPAGE);
414 #endif
415 #ifdef MADV_NOHUGEPAGE
416 P_MADV_BHV(NOHUGEPAGE);
417 #endif
418 #ifdef MADV_DONTDUMP
419 P_MADV_BHV(DONTDUMP);
420 #endif
421 #ifdef MADV_DODUMP
422 P_MADV_BHV(DODUMP);
423 #endif
424 #undef P_MADV_PHV
425 default: break;
426 }
427
428 return scnprintf(bf, size, "%#x", behavior);
429 }
430
431 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
432
433 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
434 struct syscall_arg *arg)
435 {
436 int printed = 0, op = arg->val;
437
438 if (op == 0)
439 return scnprintf(bf, size, "NONE");
440 #define P_CMD(cmd) \
441 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
442 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
443 op &= ~LOCK_##cmd; \
444 }
445
446 P_CMD(SH);
447 P_CMD(EX);
448 P_CMD(NB);
449 P_CMD(UN);
450 P_CMD(MAND);
451 P_CMD(RW);
452 P_CMD(READ);
453 P_CMD(WRITE);
454 #undef P_OP
455
456 if (op)
457 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
458
459 return printed;
460 }
461
462 #define SCA_FLOCK syscall_arg__scnprintf_flock
463
464 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
465 {
466 enum syscall_futex_args {
467 SCF_UADDR = (1 << 0),
468 SCF_OP = (1 << 1),
469 SCF_VAL = (1 << 2),
470 SCF_TIMEOUT = (1 << 3),
471 SCF_UADDR2 = (1 << 4),
472 SCF_VAL3 = (1 << 5),
473 };
474 int op = arg->val;
475 int cmd = op & FUTEX_CMD_MASK;
476 size_t printed = 0;
477
478 switch (cmd) {
479 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
480 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
481 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
482 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
483 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
484 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
485 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
486 P_FUTEX_OP(WAKE_OP); break;
487 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
488 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
489 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
490 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
491 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
492 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
493 default: printed = scnprintf(bf, size, "%#x", cmd); break;
494 }
495
496 if (op & FUTEX_PRIVATE_FLAG)
497 printed += scnprintf(bf + printed, size - printed, "|PRIV");
498
499 if (op & FUTEX_CLOCK_REALTIME)
500 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
501
502 return printed;
503 }
504
505 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
506
507 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
508 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
509
510 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
511 static DEFINE_STRARRAY(itimers);
512
513 static const char *whences[] = { "SET", "CUR", "END",
514 #ifdef SEEK_DATA
515 "DATA",
516 #endif
517 #ifdef SEEK_HOLE
518 "HOLE",
519 #endif
520 };
521 static DEFINE_STRARRAY(whences);
522
523 static const char *fcntl_cmds[] = {
524 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
525 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
526 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
527 "F_GETOWNER_UIDS",
528 };
529 static DEFINE_STRARRAY(fcntl_cmds);
530
531 static const char *rlimit_resources[] = {
532 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
533 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
534 "RTTIME",
535 };
536 static DEFINE_STRARRAY(rlimit_resources);
537
538 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
539 static DEFINE_STRARRAY(sighow);
540
541 static const char *clockid[] = {
542 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
543 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
544 };
545 static DEFINE_STRARRAY(clockid);
546
547 static const char *socket_families[] = {
548 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
549 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
550 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
551 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
552 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
553 "ALG", "NFC", "VSOCK",
554 };
555 static DEFINE_STRARRAY(socket_families);
556
557 #ifndef SOCK_TYPE_MASK
558 #define SOCK_TYPE_MASK 0xf
559 #endif
560
561 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
562 struct syscall_arg *arg)
563 {
564 size_t printed;
565 int type = arg->val,
566 flags = type & ~SOCK_TYPE_MASK;
567
568 type &= SOCK_TYPE_MASK;
569 /*
570 * Can't use a strarray, MIPS may override for ABI reasons.
571 */
572 switch (type) {
573 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
574 P_SK_TYPE(STREAM);
575 P_SK_TYPE(DGRAM);
576 P_SK_TYPE(RAW);
577 P_SK_TYPE(RDM);
578 P_SK_TYPE(SEQPACKET);
579 P_SK_TYPE(DCCP);
580 P_SK_TYPE(PACKET);
581 #undef P_SK_TYPE
582 default:
583 printed = scnprintf(bf, size, "%#x", type);
584 }
585
586 #define P_SK_FLAG(n) \
587 if (flags & SOCK_##n) { \
588 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
589 flags &= ~SOCK_##n; \
590 }
591
592 P_SK_FLAG(CLOEXEC);
593 P_SK_FLAG(NONBLOCK);
594 #undef P_SK_FLAG
595
596 if (flags)
597 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
598
599 return printed;
600 }
601
602 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
603
604 #ifndef MSG_PROBE
605 #define MSG_PROBE 0x10
606 #endif
607 #ifndef MSG_WAITFORONE
608 #define MSG_WAITFORONE 0x10000
609 #endif
610 #ifndef MSG_SENDPAGE_NOTLAST
611 #define MSG_SENDPAGE_NOTLAST 0x20000
612 #endif
613 #ifndef MSG_FASTOPEN
614 #define MSG_FASTOPEN 0x20000000
615 #endif
616
617 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
618 struct syscall_arg *arg)
619 {
620 int printed = 0, flags = arg->val;
621
622 if (flags == 0)
623 return scnprintf(bf, size, "NONE");
624 #define P_MSG_FLAG(n) \
625 if (flags & MSG_##n) { \
626 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
627 flags &= ~MSG_##n; \
628 }
629
630 P_MSG_FLAG(OOB);
631 P_MSG_FLAG(PEEK);
632 P_MSG_FLAG(DONTROUTE);
633 P_MSG_FLAG(TRYHARD);
634 P_MSG_FLAG(CTRUNC);
635 P_MSG_FLAG(PROBE);
636 P_MSG_FLAG(TRUNC);
637 P_MSG_FLAG(DONTWAIT);
638 P_MSG_FLAG(EOR);
639 P_MSG_FLAG(WAITALL);
640 P_MSG_FLAG(FIN);
641 P_MSG_FLAG(SYN);
642 P_MSG_FLAG(CONFIRM);
643 P_MSG_FLAG(RST);
644 P_MSG_FLAG(ERRQUEUE);
645 P_MSG_FLAG(NOSIGNAL);
646 P_MSG_FLAG(MORE);
647 P_MSG_FLAG(WAITFORONE);
648 P_MSG_FLAG(SENDPAGE_NOTLAST);
649 P_MSG_FLAG(FASTOPEN);
650 P_MSG_FLAG(CMSG_CLOEXEC);
651 #undef P_MSG_FLAG
652
653 if (flags)
654 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
655
656 return printed;
657 }
658
659 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
660
661 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
662 struct syscall_arg *arg)
663 {
664 size_t printed = 0;
665 int mode = arg->val;
666
667 if (mode == F_OK) /* 0 */
668 return scnprintf(bf, size, "F");
669 #define P_MODE(n) \
670 if (mode & n##_OK) { \
671 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
672 mode &= ~n##_OK; \
673 }
674
675 P_MODE(R);
676 P_MODE(W);
677 P_MODE(X);
678 #undef P_MODE
679
680 if (mode)
681 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
682
683 return printed;
684 }
685
686 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
687
688 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
689 struct syscall_arg *arg)
690 {
691 int printed = 0, flags = arg->val;
692
693 if (!(flags & O_CREAT))
694 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
695
696 if (flags == 0)
697 return scnprintf(bf, size, "RDONLY");
698 #define P_FLAG(n) \
699 if (flags & O_##n) { \
700 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
701 flags &= ~O_##n; \
702 }
703
704 P_FLAG(APPEND);
705 P_FLAG(ASYNC);
706 P_FLAG(CLOEXEC);
707 P_FLAG(CREAT);
708 P_FLAG(DIRECT);
709 P_FLAG(DIRECTORY);
710 P_FLAG(EXCL);
711 P_FLAG(LARGEFILE);
712 P_FLAG(NOATIME);
713 P_FLAG(NOCTTY);
714 #ifdef O_NONBLOCK
715 P_FLAG(NONBLOCK);
716 #elif O_NDELAY
717 P_FLAG(NDELAY);
718 #endif
719 #ifdef O_PATH
720 P_FLAG(PATH);
721 #endif
722 P_FLAG(RDWR);
723 #ifdef O_DSYNC
724 if ((flags & O_SYNC) == O_SYNC)
725 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
726 else {
727 P_FLAG(DSYNC);
728 }
729 #else
730 P_FLAG(SYNC);
731 #endif
732 P_FLAG(TRUNC);
733 P_FLAG(WRONLY);
734 #undef P_FLAG
735
736 if (flags)
737 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
738
739 return printed;
740 }
741
742 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
743
744 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
745 struct syscall_arg *arg)
746 {
747 int printed = 0, flags = arg->val;
748
749 if (flags == 0)
750 return scnprintf(bf, size, "NONE");
751 #define P_FLAG(n) \
752 if (flags & EFD_##n) { \
753 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
754 flags &= ~EFD_##n; \
755 }
756
757 P_FLAG(SEMAPHORE);
758 P_FLAG(CLOEXEC);
759 P_FLAG(NONBLOCK);
760 #undef P_FLAG
761
762 if (flags)
763 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
764
765 return printed;
766 }
767
768 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
769
770 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
771 struct syscall_arg *arg)
772 {
773 int printed = 0, flags = arg->val;
774
775 #define P_FLAG(n) \
776 if (flags & O_##n) { \
777 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
778 flags &= ~O_##n; \
779 }
780
781 P_FLAG(CLOEXEC);
782 P_FLAG(NONBLOCK);
783 #undef P_FLAG
784
785 if (flags)
786 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
787
788 return printed;
789 }
790
791 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
792
793 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
794 {
795 int sig = arg->val;
796
797 switch (sig) {
798 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
799 P_SIGNUM(HUP);
800 P_SIGNUM(INT);
801 P_SIGNUM(QUIT);
802 P_SIGNUM(ILL);
803 P_SIGNUM(TRAP);
804 P_SIGNUM(ABRT);
805 P_SIGNUM(BUS);
806 P_SIGNUM(FPE);
807 P_SIGNUM(KILL);
808 P_SIGNUM(USR1);
809 P_SIGNUM(SEGV);
810 P_SIGNUM(USR2);
811 P_SIGNUM(PIPE);
812 P_SIGNUM(ALRM);
813 P_SIGNUM(TERM);
814 P_SIGNUM(STKFLT);
815 P_SIGNUM(CHLD);
816 P_SIGNUM(CONT);
817 P_SIGNUM(STOP);
818 P_SIGNUM(TSTP);
819 P_SIGNUM(TTIN);
820 P_SIGNUM(TTOU);
821 P_SIGNUM(URG);
822 P_SIGNUM(XCPU);
823 P_SIGNUM(XFSZ);
824 P_SIGNUM(VTALRM);
825 P_SIGNUM(PROF);
826 P_SIGNUM(WINCH);
827 P_SIGNUM(IO);
828 P_SIGNUM(PWR);
829 P_SIGNUM(SYS);
830 default: break;
831 }
832
833 return scnprintf(bf, size, "%#x", sig);
834 }
835
836 #define SCA_SIGNUM syscall_arg__scnprintf_signum
837
838 #define TCGETS 0x5401
839
840 static const char *tioctls[] = {
841 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
842 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
843 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
844 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
845 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
846 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
847 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
848 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
849 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
850 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
851 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
852 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
853 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
854 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
855 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
856 };
857
858 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
859
860 #define STRARRAY(arg, name, array) \
861 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
862 .arg_parm = { [arg] = &strarray__##array, }
863
864 static struct syscall_fmt {
865 const char *name;
866 const char *alias;
867 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
868 void *arg_parm[6];
869 bool errmsg;
870 bool timeout;
871 bool hexret;
872 } syscall_fmts[] = {
873 { .name = "access", .errmsg = true,
874 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
875 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
876 { .name = "brk", .hexret = true,
877 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
878 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
879 { .name = "close", .errmsg = true,
880 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
881 { .name = "connect", .errmsg = true, },
882 { .name = "dup", .errmsg = true,
883 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
884 { .name = "dup2", .errmsg = true,
885 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
886 { .name = "dup3", .errmsg = true,
887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
889 { .name = "eventfd2", .errmsg = true,
890 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
891 { .name = "faccessat", .errmsg = true,
892 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
893 { .name = "fadvise64", .errmsg = true,
894 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
895 { .name = "fallocate", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
897 { .name = "fchdir", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 { .name = "fchmod", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fchmodat", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
903 { .name = "fchown", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchownat", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 { .name = "fcntl", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */
909 [1] = SCA_STRARRAY, /* cmd */ },
910 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
911 { .name = "fdatasync", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
913 { .name = "flock", .errmsg = true,
914 .arg_scnprintf = { [0] = SCA_FD, /* fd */
915 [1] = SCA_FLOCK, /* cmd */ }, },
916 { .name = "fsetxattr", .errmsg = true,
917 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
918 { .name = "fstat", .errmsg = true, .alias = "newfstat",
919 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
920 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
921 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
922 { .name = "fstatfs", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fsync", .errmsg = true,
925 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
926 { .name = "ftruncate", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "futex", .errmsg = true,
929 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
930 { .name = "futimesat", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
932 { .name = "getdents", .errmsg = true,
933 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
934 { .name = "getdents64", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
936 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
937 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
938 { .name = "ioctl", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */
940 [1] = SCA_STRHEXARRAY, /* cmd */
941 [2] = SCA_HEX, /* arg */ },
942 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
943 { .name = "kill", .errmsg = true,
944 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
945 { .name = "linkat", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 { .name = "lseek", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_FD, /* fd */
949 [2] = SCA_STRARRAY, /* whence */ },
950 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
951 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
952 { .name = "madvise", .errmsg = true,
953 .arg_scnprintf = { [0] = SCA_HEX, /* start */
954 [2] = SCA_MADV_BHV, /* behavior */ }, },
955 { .name = "mkdirat", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
957 { .name = "mknodat", .errmsg = true,
958 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
959 { .name = "mlock", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
961 { .name = "mlockall", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963 { .name = "mmap", .hexret = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
965 [2] = SCA_MMAP_PROT, /* prot */
966 [3] = SCA_MMAP_FLAGS, /* flags */
967 [4] = SCA_FD, /* fd */ }, },
968 { .name = "mprotect", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_HEX, /* start */
970 [2] = SCA_MMAP_PROT, /* prot */ }, },
971 { .name = "mremap", .hexret = true,
972 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
973 [4] = SCA_HEX, /* new_addr */ }, },
974 { .name = "munlock", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
976 { .name = "munmap", .errmsg = true,
977 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
978 { .name = "name_to_handle_at", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
980 { .name = "newfstatat", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
982 { .name = "open", .errmsg = true,
983 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
984 { .name = "open_by_handle_at", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
986 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
987 { .name = "openat", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
989 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
990 { .name = "pipe2", .errmsg = true,
991 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
992 { .name = "poll", .errmsg = true, .timeout = true, },
993 { .name = "ppoll", .errmsg = true, .timeout = true, },
994 { .name = "pread", .errmsg = true, .alias = "pread64",
995 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
996 { .name = "preadv", .errmsg = true, .alias = "pread",
997 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
998 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
999 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1000 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1001 { .name = "pwritev", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1003 { .name = "read", .errmsg = true,
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "readlinkat", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1007 { .name = "readv", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "recvfrom", .errmsg = true,
1010 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1011 { .name = "recvmmsg", .errmsg = true,
1012 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1013 { .name = "recvmsg", .errmsg = true,
1014 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1015 { .name = "renameat", .errmsg = true,
1016 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1017 { .name = "rt_sigaction", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1019 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1020 { .name = "rt_sigqueueinfo", .errmsg = true,
1021 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1022 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1023 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1024 { .name = "select", .errmsg = true, .timeout = true, },
1025 { .name = "sendmmsg", .errmsg = true,
1026 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1027 { .name = "sendmsg", .errmsg = true,
1028 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1029 { .name = "sendto", .errmsg = true,
1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1032 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1033 { .name = "shutdown", .errmsg = true,
1034 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1035 { .name = "socket", .errmsg = true,
1036 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1037 [1] = SCA_SK_TYPE, /* type */ },
1038 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1039 { .name = "socketpair", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 [1] = SCA_SK_TYPE, /* type */ },
1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1043 { .name = "stat", .errmsg = true, .alias = "newstat", },
1044 { .name = "symlinkat", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1046 { .name = "tgkill", .errmsg = true,
1047 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1048 { .name = "tkill", .errmsg = true,
1049 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1050 { .name = "uname", .errmsg = true, .alias = "newuname", },
1051 { .name = "unlinkat", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1053 { .name = "utimensat", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1055 { .name = "write", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1057 { .name = "writev", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 };
1060
1061 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1062 {
1063 const struct syscall_fmt *fmt = fmtp;
1064 return strcmp(name, fmt->name);
1065 }
1066
1067 static struct syscall_fmt *syscall_fmt__find(const char *name)
1068 {
1069 const int nmemb = ARRAY_SIZE(syscall_fmts);
1070 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1071 }
1072
1073 struct syscall {
1074 struct event_format *tp_format;
1075 const char *name;
1076 bool filtered;
1077 struct syscall_fmt *fmt;
1078 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1079 void **arg_parm;
1080 };
1081
1082 static size_t fprintf_duration(unsigned long t, FILE *fp)
1083 {
1084 double duration = (double)t / NSEC_PER_MSEC;
1085 size_t printed = fprintf(fp, "(");
1086
1087 if (duration >= 1.0)
1088 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1089 else if (duration >= 0.01)
1090 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1091 else
1092 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1093 return printed + fprintf(fp, "): ");
1094 }
1095
1096 struct thread_trace {
1097 u64 entry_time;
1098 u64 exit_time;
1099 bool entry_pending;
1100 unsigned long nr_events;
1101 char *entry_str;
1102 double runtime_ms;
1103 struct {
1104 int max;
1105 char **table;
1106 } paths;
1107
1108 struct intlist *syscall_stats;
1109 };
1110
1111 static struct thread_trace *thread_trace__new(void)
1112 {
1113 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1114
1115 if (ttrace)
1116 ttrace->paths.max = -1;
1117
1118 ttrace->syscall_stats = intlist__new(NULL);
1119
1120 return ttrace;
1121 }
1122
1123 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1124 {
1125 struct thread_trace *ttrace;
1126
1127 if (thread == NULL)
1128 goto fail;
1129
1130 if (thread->priv == NULL)
1131 thread->priv = thread_trace__new();
1132
1133 if (thread->priv == NULL)
1134 goto fail;
1135
1136 ttrace = thread->priv;
1137 ++ttrace->nr_events;
1138
1139 return ttrace;
1140 fail:
1141 color_fprintf(fp, PERF_COLOR_RED,
1142 "WARNING: not enough memory, dropping samples!\n");
1143 return NULL;
1144 }
1145
1146 struct trace {
1147 struct perf_tool tool;
1148 struct {
1149 int machine;
1150 int open_id;
1151 } audit;
1152 struct {
1153 int max;
1154 struct syscall *table;
1155 } syscalls;
1156 struct perf_record_opts opts;
1157 struct machine *host;
1158 u64 base_time;
1159 bool full_time;
1160 FILE *output;
1161 unsigned long nr_events;
1162 struct strlist *ev_qualifier;
1163 bool not_ev_qualifier;
1164 bool live;
1165 const char *last_vfs_getname;
1166 struct intlist *tid_list;
1167 struct intlist *pid_list;
1168 bool sched;
1169 bool multiple_threads;
1170 bool summary;
1171 bool summary_only;
1172 bool show_comm;
1173 bool show_tool_stats;
1174 double duration_filter;
1175 double runtime_ms;
1176 struct {
1177 u64 vfs_getname, proc_getname;
1178 } stats;
1179 };
1180
1181 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1182 {
1183 struct thread_trace *ttrace = thread->priv;
1184
1185 if (fd > ttrace->paths.max) {
1186 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1187
1188 if (npath == NULL)
1189 return -1;
1190
1191 if (ttrace->paths.max != -1) {
1192 memset(npath + ttrace->paths.max + 1, 0,
1193 (fd - ttrace->paths.max) * sizeof(char *));
1194 } else {
1195 memset(npath, 0, (fd + 1) * sizeof(char *));
1196 }
1197
1198 ttrace->paths.table = npath;
1199 ttrace->paths.max = fd;
1200 }
1201
1202 ttrace->paths.table[fd] = strdup(pathname);
1203
1204 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1205 }
1206
1207 static int thread__read_fd_path(struct thread *thread, int fd)
1208 {
1209 char linkname[PATH_MAX], pathname[PATH_MAX];
1210 struct stat st;
1211 int ret;
1212
1213 if (thread->pid_ == thread->tid) {
1214 scnprintf(linkname, sizeof(linkname),
1215 "/proc/%d/fd/%d", thread->pid_, fd);
1216 } else {
1217 scnprintf(linkname, sizeof(linkname),
1218 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1219 }
1220
1221 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1222 return -1;
1223
1224 ret = readlink(linkname, pathname, sizeof(pathname));
1225
1226 if (ret < 0 || ret > st.st_size)
1227 return -1;
1228
1229 pathname[ret] = '\0';
1230 return trace__set_fd_pathname(thread, fd, pathname);
1231 }
1232
1233 static const char *thread__fd_path(struct thread *thread, int fd,
1234 struct trace *trace)
1235 {
1236 struct thread_trace *ttrace = thread->priv;
1237
1238 if (ttrace == NULL)
1239 return NULL;
1240
1241 if (fd < 0)
1242 return NULL;
1243
1244 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1245 if (!trace->live)
1246 return NULL;
1247 ++trace->stats.proc_getname;
1248 if (thread__read_fd_path(thread, fd)) {
1249 return NULL;
1250 }
1251
1252 return ttrace->paths.table[fd];
1253 }
1254
1255 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1256 struct syscall_arg *arg)
1257 {
1258 int fd = arg->val;
1259 size_t printed = scnprintf(bf, size, "%d", fd);
1260 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1261
1262 if (path)
1263 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1264
1265 return printed;
1266 }
1267
1268 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1269 struct syscall_arg *arg)
1270 {
1271 int fd = arg->val;
1272 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1273 struct thread_trace *ttrace = arg->thread->priv;
1274
1275 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1276 free(ttrace->paths.table[fd]);
1277 ttrace->paths.table[fd] = NULL;
1278 }
1279
1280 return printed;
1281 }
1282
1283 static bool trace__filter_duration(struct trace *trace, double t)
1284 {
1285 return t < (trace->duration_filter * NSEC_PER_MSEC);
1286 }
1287
1288 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1289 {
1290 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1291
1292 return fprintf(fp, "%10.3f ", ts);
1293 }
1294
1295 static bool done = false;
1296 static bool interrupted = false;
1297
1298 static void sig_handler(int sig)
1299 {
1300 done = true;
1301 interrupted = sig == SIGINT;
1302 }
1303
1304 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1305 u64 duration, u64 tstamp, FILE *fp)
1306 {
1307 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1308 printed += fprintf_duration(duration, fp);
1309
1310 if (trace->multiple_threads) {
1311 if (trace->show_comm)
1312 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1313 printed += fprintf(fp, "%d ", thread->tid);
1314 }
1315
1316 return printed;
1317 }
1318
1319 static int trace__process_event(struct trace *trace, struct machine *machine,
1320 union perf_event *event, struct perf_sample *sample)
1321 {
1322 int ret = 0;
1323
1324 switch (event->header.type) {
1325 case PERF_RECORD_LOST:
1326 color_fprintf(trace->output, PERF_COLOR_RED,
1327 "LOST %" PRIu64 " events!\n", event->lost.lost);
1328 ret = machine__process_lost_event(machine, event, sample);
1329 default:
1330 ret = machine__process_event(machine, event, sample);
1331 break;
1332 }
1333
1334 return ret;
1335 }
1336
1337 static int trace__tool_process(struct perf_tool *tool,
1338 union perf_event *event,
1339 struct perf_sample *sample,
1340 struct machine *machine)
1341 {
1342 struct trace *trace = container_of(tool, struct trace, tool);
1343 return trace__process_event(trace, machine, event, sample);
1344 }
1345
1346 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1347 {
1348 int err = symbol__init();
1349
1350 if (err)
1351 return err;
1352
1353 trace->host = machine__new_host();
1354 if (trace->host == NULL)
1355 return -ENOMEM;
1356
1357 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1358 evlist->threads, trace__tool_process, false);
1359 if (err)
1360 symbol__exit();
1361
1362 return err;
1363 }
1364
1365 static int syscall__set_arg_fmts(struct syscall *sc)
1366 {
1367 struct format_field *field;
1368 int idx = 0;
1369
1370 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1371 if (sc->arg_scnprintf == NULL)
1372 return -1;
1373
1374 if (sc->fmt)
1375 sc->arg_parm = sc->fmt->arg_parm;
1376
1377 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1378 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1379 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1380 else if (field->flags & FIELD_IS_POINTER)
1381 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1382 ++idx;
1383 }
1384
1385 return 0;
1386 }
1387
1388 static int trace__read_syscall_info(struct trace *trace, int id)
1389 {
1390 char tp_name[128];
1391 struct syscall *sc;
1392 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1393
1394 if (name == NULL)
1395 return -1;
1396
1397 if (id > trace->syscalls.max) {
1398 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1399
1400 if (nsyscalls == NULL)
1401 return -1;
1402
1403 if (trace->syscalls.max != -1) {
1404 memset(nsyscalls + trace->syscalls.max + 1, 0,
1405 (id - trace->syscalls.max) * sizeof(*sc));
1406 } else {
1407 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1408 }
1409
1410 trace->syscalls.table = nsyscalls;
1411 trace->syscalls.max = id;
1412 }
1413
1414 sc = trace->syscalls.table + id;
1415 sc->name = name;
1416
1417 if (trace->ev_qualifier) {
1418 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1419
1420 if (!(in ^ trace->not_ev_qualifier)) {
1421 sc->filtered = true;
1422 /*
1423 * No need to do read tracepoint information since this will be
1424 * filtered out.
1425 */
1426 return 0;
1427 }
1428 }
1429
1430 sc->fmt = syscall_fmt__find(sc->name);
1431
1432 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1433 sc->tp_format = event_format__new("syscalls", tp_name);
1434
1435 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1436 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1437 sc->tp_format = event_format__new("syscalls", tp_name);
1438 }
1439
1440 if (sc->tp_format == NULL)
1441 return -1;
1442
1443 return syscall__set_arg_fmts(sc);
1444 }
1445
1446 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1447 unsigned long *args, struct trace *trace,
1448 struct thread *thread)
1449 {
1450 size_t printed = 0;
1451
1452 if (sc->tp_format != NULL) {
1453 struct format_field *field;
1454 u8 bit = 1;
1455 struct syscall_arg arg = {
1456 .idx = 0,
1457 .mask = 0,
1458 .trace = trace,
1459 .thread = thread,
1460 };
1461
1462 for (field = sc->tp_format->format.fields->next; field;
1463 field = field->next, ++arg.idx, bit <<= 1) {
1464 if (arg.mask & bit)
1465 continue;
1466 /*
1467 * Suppress this argument if its value is zero and
1468 * and we don't have a string associated in an
1469 * strarray for it.
1470 */
1471 if (args[arg.idx] == 0 &&
1472 !(sc->arg_scnprintf &&
1473 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1474 sc->arg_parm[arg.idx]))
1475 continue;
1476
1477 printed += scnprintf(bf + printed, size - printed,
1478 "%s%s: ", printed ? ", " : "", field->name);
1479 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1480 arg.val = args[arg.idx];
1481 if (sc->arg_parm)
1482 arg.parm = sc->arg_parm[arg.idx];
1483 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1484 size - printed, &arg);
1485 } else {
1486 printed += scnprintf(bf + printed, size - printed,
1487 "%ld", args[arg.idx]);
1488 }
1489 }
1490 } else {
1491 int i = 0;
1492
1493 while (i < 6) {
1494 printed += scnprintf(bf + printed, size - printed,
1495 "%sarg%d: %ld",
1496 printed ? ", " : "", i, args[i]);
1497 ++i;
1498 }
1499 }
1500
1501 return printed;
1502 }
1503
1504 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1505 struct perf_sample *sample);
1506
1507 static struct syscall *trace__syscall_info(struct trace *trace,
1508 struct perf_evsel *evsel, int id)
1509 {
1510
1511 if (id < 0) {
1512
1513 /*
1514 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1515 * before that, leaving at a higher verbosity level till that is
1516 * explained. Reproduced with plain ftrace with:
1517 *
1518 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1519 * grep "NR -1 " /t/trace_pipe
1520 *
1521 * After generating some load on the machine.
1522 */
1523 if (verbose > 1) {
1524 static u64 n;
1525 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1526 id, perf_evsel__name(evsel), ++n);
1527 }
1528 return NULL;
1529 }
1530
1531 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1532 trace__read_syscall_info(trace, id))
1533 goto out_cant_read;
1534
1535 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1536 goto out_cant_read;
1537
1538 return &trace->syscalls.table[id];
1539
1540 out_cant_read:
1541 if (verbose) {
1542 fprintf(trace->output, "Problems reading syscall %d", id);
1543 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1544 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1545 fputs(" information\n", trace->output);
1546 }
1547 return NULL;
1548 }
1549
1550 static void thread__update_stats(struct thread_trace *ttrace,
1551 int id, struct perf_sample *sample)
1552 {
1553 struct int_node *inode;
1554 struct stats *stats;
1555 u64 duration = 0;
1556
1557 inode = intlist__findnew(ttrace->syscall_stats, id);
1558 if (inode == NULL)
1559 return;
1560
1561 stats = inode->priv;
1562 if (stats == NULL) {
1563 stats = malloc(sizeof(struct stats));
1564 if (stats == NULL)
1565 return;
1566 init_stats(stats);
1567 inode->priv = stats;
1568 }
1569
1570 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1571 duration = sample->time - ttrace->entry_time;
1572
1573 update_stats(stats, duration);
1574 }
1575
1576 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1577 struct perf_sample *sample)
1578 {
1579 char *msg;
1580 void *args;
1581 size_t printed = 0;
1582 struct thread *thread;
1583 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1584 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1585 struct thread_trace *ttrace;
1586
1587 if (sc == NULL)
1588 return -1;
1589
1590 if (sc->filtered)
1591 return 0;
1592
1593 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1594 ttrace = thread__trace(thread, trace->output);
1595 if (ttrace == NULL)
1596 return -1;
1597
1598 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1599 ttrace = thread->priv;
1600
1601 if (ttrace->entry_str == NULL) {
1602 ttrace->entry_str = malloc(1024);
1603 if (!ttrace->entry_str)
1604 return -1;
1605 }
1606
1607 ttrace->entry_time = sample->time;
1608 msg = ttrace->entry_str;
1609 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1610
1611 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1612 args, trace, thread);
1613
1614 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1615 if (!trace->duration_filter && !trace->summary_only) {
1616 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1617 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1618 }
1619 } else
1620 ttrace->entry_pending = true;
1621
1622 return 0;
1623 }
1624
1625 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1626 struct perf_sample *sample)
1627 {
1628 int ret;
1629 u64 duration = 0;
1630 struct thread *thread;
1631 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1632 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1633 struct thread_trace *ttrace;
1634
1635 if (sc == NULL)
1636 return -1;
1637
1638 if (sc->filtered)
1639 return 0;
1640
1641 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1642 ttrace = thread__trace(thread, trace->output);
1643 if (ttrace == NULL)
1644 return -1;
1645
1646 if (trace->summary)
1647 thread__update_stats(ttrace, id, sample);
1648
1649 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1650
1651 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1652 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1653 trace->last_vfs_getname = NULL;
1654 ++trace->stats.vfs_getname;
1655 }
1656
1657 ttrace = thread->priv;
1658
1659 ttrace->exit_time = sample->time;
1660
1661 if (ttrace->entry_time) {
1662 duration = sample->time - ttrace->entry_time;
1663 if (trace__filter_duration(trace, duration))
1664 goto out;
1665 } else if (trace->duration_filter)
1666 goto out;
1667
1668 if (trace->summary_only)
1669 goto out;
1670
1671 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1672
1673 if (ttrace->entry_pending) {
1674 fprintf(trace->output, "%-70s", ttrace->entry_str);
1675 } else {
1676 fprintf(trace->output, " ... [");
1677 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1678 fprintf(trace->output, "]: %s()", sc->name);
1679 }
1680
1681 if (sc->fmt == NULL) {
1682 signed_print:
1683 fprintf(trace->output, ") = %d", ret);
1684 } else if (ret < 0 && sc->fmt->errmsg) {
1685 char bf[256];
1686 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1687 *e = audit_errno_to_name(-ret);
1688
1689 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1690 } else if (ret == 0 && sc->fmt->timeout)
1691 fprintf(trace->output, ") = 0 Timeout");
1692 else if (sc->fmt->hexret)
1693 fprintf(trace->output, ") = %#x", ret);
1694 else
1695 goto signed_print;
1696
1697 fputc('\n', trace->output);
1698 out:
1699 ttrace->entry_pending = false;
1700
1701 return 0;
1702 }
1703
1704 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1705 struct perf_sample *sample)
1706 {
1707 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1708 return 0;
1709 }
1710
1711 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1712 struct perf_sample *sample)
1713 {
1714 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1715 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1716 struct thread *thread = machine__findnew_thread(trace->host,
1717 sample->pid,
1718 sample->tid);
1719 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1720
1721 if (ttrace == NULL)
1722 goto out_dump;
1723
1724 ttrace->runtime_ms += runtime_ms;
1725 trace->runtime_ms += runtime_ms;
1726 return 0;
1727
1728 out_dump:
1729 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1730 evsel->name,
1731 perf_evsel__strval(evsel, sample, "comm"),
1732 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1733 runtime,
1734 perf_evsel__intval(evsel, sample, "vruntime"));
1735 return 0;
1736 }
1737
1738 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1739 {
1740 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1741 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1742 return false;
1743
1744 if (trace->pid_list || trace->tid_list)
1745 return true;
1746
1747 return false;
1748 }
1749
1750 static int trace__process_sample(struct perf_tool *tool,
1751 union perf_event *event __maybe_unused,
1752 struct perf_sample *sample,
1753 struct perf_evsel *evsel,
1754 struct machine *machine __maybe_unused)
1755 {
1756 struct trace *trace = container_of(tool, struct trace, tool);
1757 int err = 0;
1758
1759 tracepoint_handler handler = evsel->handler;
1760
1761 if (skip_sample(trace, sample))
1762 return 0;
1763
1764 if (!trace->full_time && trace->base_time == 0)
1765 trace->base_time = sample->time;
1766
1767 if (handler)
1768 handler(trace, evsel, sample);
1769
1770 return err;
1771 }
1772
1773 static int parse_target_str(struct trace *trace)
1774 {
1775 if (trace->opts.target.pid) {
1776 trace->pid_list = intlist__new(trace->opts.target.pid);
1777 if (trace->pid_list == NULL) {
1778 pr_err("Error parsing process id string\n");
1779 return -EINVAL;
1780 }
1781 }
1782
1783 if (trace->opts.target.tid) {
1784 trace->tid_list = intlist__new(trace->opts.target.tid);
1785 if (trace->tid_list == NULL) {
1786 pr_err("Error parsing thread id string\n");
1787 return -EINVAL;
1788 }
1789 }
1790
1791 return 0;
1792 }
1793
1794 static int trace__record(int argc, const char **argv)
1795 {
1796 unsigned int rec_argc, i, j;
1797 const char **rec_argv;
1798 const char * const record_args[] = {
1799 "record",
1800 "-R",
1801 "-m", "1024",
1802 "-c", "1",
1803 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1804 };
1805
1806 rec_argc = ARRAY_SIZE(record_args) + argc;
1807 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1808
1809 if (rec_argv == NULL)
1810 return -ENOMEM;
1811
1812 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1813 rec_argv[i] = record_args[i];
1814
1815 for (j = 0; j < (unsigned int)argc; j++, i++)
1816 rec_argv[i] = argv[j];
1817
1818 return cmd_record(i, rec_argv, NULL);
1819 }
1820
1821 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1822
1823 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1824 {
1825 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1826 if (evsel == NULL)
1827 return;
1828
1829 if (perf_evsel__field(evsel, "pathname") == NULL) {
1830 perf_evsel__delete(evsel);
1831 return;
1832 }
1833
1834 evsel->handler = trace__vfs_getname;
1835 perf_evlist__add(evlist, evsel);
1836 }
1837
1838 static int trace__run(struct trace *trace, int argc, const char **argv)
1839 {
1840 struct perf_evlist *evlist = perf_evlist__new();
1841 struct perf_evsel *evsel;
1842 int err = -1, i;
1843 unsigned long before;
1844 const bool forks = argc > 0;
1845
1846 trace->live = true;
1847
1848 if (evlist == NULL) {
1849 fprintf(trace->output, "Not enough memory to run!\n");
1850 goto out;
1851 }
1852
1853 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1854 goto out_error_tp;
1855
1856 perf_evlist__add_vfs_getname(evlist);
1857
1858 if (trace->sched &&
1859 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1860 trace__sched_stat_runtime))
1861 goto out_error_tp;
1862
1863 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1864 if (err < 0) {
1865 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1866 goto out_delete_evlist;
1867 }
1868
1869 err = trace__symbols_init(trace, evlist);
1870 if (err < 0) {
1871 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1872 goto out_delete_maps;
1873 }
1874
1875 perf_evlist__config(evlist, &trace->opts);
1876
1877 signal(SIGCHLD, sig_handler);
1878 signal(SIGINT, sig_handler);
1879
1880 if (forks) {
1881 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1882 argv, false, false);
1883 if (err < 0) {
1884 fprintf(trace->output, "Couldn't run the workload!\n");
1885 goto out_delete_maps;
1886 }
1887 }
1888
1889 err = perf_evlist__open(evlist);
1890 if (err < 0)
1891 goto out_error_open;
1892
1893 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1894 if (err < 0) {
1895 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1896 goto out_close_evlist;
1897 }
1898
1899 perf_evlist__enable(evlist);
1900
1901 if (forks)
1902 perf_evlist__start_workload(evlist);
1903
1904 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1905 again:
1906 before = trace->nr_events;
1907
1908 for (i = 0; i < evlist->nr_mmaps; i++) {
1909 union perf_event *event;
1910
1911 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1912 const u32 type = event->header.type;
1913 tracepoint_handler handler;
1914 struct perf_sample sample;
1915
1916 ++trace->nr_events;
1917
1918 err = perf_evlist__parse_sample(evlist, event, &sample);
1919 if (err) {
1920 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1921 goto next_event;
1922 }
1923
1924 if (!trace->full_time && trace->base_time == 0)
1925 trace->base_time = sample.time;
1926
1927 if (type != PERF_RECORD_SAMPLE) {
1928 trace__process_event(trace, trace->host, event, &sample);
1929 continue;
1930 }
1931
1932 evsel = perf_evlist__id2evsel(evlist, sample.id);
1933 if (evsel == NULL) {
1934 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1935 goto next_event;
1936 }
1937
1938 if (sample.raw_data == NULL) {
1939 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1940 perf_evsel__name(evsel), sample.tid,
1941 sample.cpu, sample.raw_size);
1942 goto next_event;
1943 }
1944
1945 handler = evsel->handler;
1946 handler(trace, evsel, &sample);
1947 next_event:
1948 perf_evlist__mmap_consume(evlist, i);
1949
1950 if (interrupted)
1951 goto out_disable;
1952 }
1953 }
1954
1955 if (trace->nr_events == before) {
1956 int timeout = done ? 100 : -1;
1957
1958 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1959 goto again;
1960 } else {
1961 goto again;
1962 }
1963
1964 out_disable:
1965 perf_evlist__disable(evlist);
1966
1967 if (!err) {
1968 if (trace->summary)
1969 trace__fprintf_thread_summary(trace, trace->output);
1970
1971 if (trace->show_tool_stats) {
1972 fprintf(trace->output, "Stats:\n "
1973 " vfs_getname : %" PRIu64 "\n"
1974 " proc_getname: %" PRIu64 "\n",
1975 trace->stats.vfs_getname,
1976 trace->stats.proc_getname);
1977 }
1978 }
1979
1980 perf_evlist__munmap(evlist);
1981 out_close_evlist:
1982 perf_evlist__close(evlist);
1983 out_delete_maps:
1984 perf_evlist__delete_maps(evlist);
1985 out_delete_evlist:
1986 perf_evlist__delete(evlist);
1987 out:
1988 trace->live = false;
1989 return err;
1990 {
1991 char errbuf[BUFSIZ];
1992
1993 out_error_tp:
1994 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1995 goto out_error;
1996
1997 out_error_open:
1998 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1999
2000 out_error:
2001 fprintf(trace->output, "%s\n", errbuf);
2002 goto out_delete_evlist;
2003 }
2004 }
2005
2006 static int trace__replay(struct trace *trace)
2007 {
2008 const struct perf_evsel_str_handler handlers[] = {
2009 { "probe:vfs_getname", trace__vfs_getname, },
2010 };
2011 struct perf_data_file file = {
2012 .path = input_name,
2013 .mode = PERF_DATA_MODE_READ,
2014 };
2015 struct perf_session *session;
2016 struct perf_evsel *evsel;
2017 int err = -1;
2018
2019 trace->tool.sample = trace__process_sample;
2020 trace->tool.mmap = perf_event__process_mmap;
2021 trace->tool.mmap2 = perf_event__process_mmap2;
2022 trace->tool.comm = perf_event__process_comm;
2023 trace->tool.exit = perf_event__process_exit;
2024 trace->tool.fork = perf_event__process_fork;
2025 trace->tool.attr = perf_event__process_attr;
2026 trace->tool.tracing_data = perf_event__process_tracing_data;
2027 trace->tool.build_id = perf_event__process_build_id;
2028
2029 trace->tool.ordered_samples = true;
2030 trace->tool.ordering_requires_timestamps = true;
2031
2032 /* add tid to output */
2033 trace->multiple_threads = true;
2034
2035 if (symbol__init() < 0)
2036 return -1;
2037
2038 session = perf_session__new(&file, false, &trace->tool);
2039 if (session == NULL)
2040 return -ENOMEM;
2041
2042 trace->host = &session->machines.host;
2043
2044 err = perf_session__set_tracepoints_handlers(session, handlers);
2045 if (err)
2046 goto out;
2047
2048 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2049 "raw_syscalls:sys_enter");
2050 if (evsel == NULL) {
2051 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2052 goto out;
2053 }
2054
2055 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2056 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2057 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2058 goto out;
2059 }
2060
2061 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2062 "raw_syscalls:sys_exit");
2063 if (evsel == NULL) {
2064 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2065 goto out;
2066 }
2067
2068 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2069 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2070 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2071 goto out;
2072 }
2073
2074 err = parse_target_str(trace);
2075 if (err != 0)
2076 goto out;
2077
2078 setup_pager();
2079
2080 err = perf_session__process_events(session, &trace->tool);
2081 if (err)
2082 pr_err("Failed to process events, error %d", err);
2083
2084 else if (trace->summary)
2085 trace__fprintf_thread_summary(trace, trace->output);
2086
2087 out:
2088 perf_session__delete(session);
2089
2090 return err;
2091 }
2092
2093 static size_t trace__fprintf_threads_header(FILE *fp)
2094 {
2095 size_t printed;
2096
2097 printed = fprintf(fp, "\n Summary of events:\n\n");
2098
2099 return printed;
2100 }
2101
2102 static size_t thread__dump_stats(struct thread_trace *ttrace,
2103 struct trace *trace, FILE *fp)
2104 {
2105 struct stats *stats;
2106 size_t printed = 0;
2107 struct syscall *sc;
2108 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2109
2110 if (inode == NULL)
2111 return 0;
2112
2113 printed += fprintf(fp, "\n");
2114
2115 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2116 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2117 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2118
2119 /* each int_node is a syscall */
2120 while (inode) {
2121 stats = inode->priv;
2122 if (stats) {
2123 double min = (double)(stats->min) / NSEC_PER_MSEC;
2124 double max = (double)(stats->max) / NSEC_PER_MSEC;
2125 double avg = avg_stats(stats);
2126 double pct;
2127 u64 n = (u64) stats->n;
2128
2129 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2130 avg /= NSEC_PER_MSEC;
2131
2132 sc = &trace->syscalls.table[inode->i];
2133 printed += fprintf(fp, " %-15s", sc->name);
2134 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2135 n, min, avg);
2136 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2137 }
2138
2139 inode = intlist__next(inode);
2140 }
2141
2142 printed += fprintf(fp, "\n\n");
2143
2144 return printed;
2145 }
2146
2147 /* struct used to pass data to per-thread function */
2148 struct summary_data {
2149 FILE *fp;
2150 struct trace *trace;
2151 size_t printed;
2152 };
2153
2154 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2155 {
2156 struct summary_data *data = priv;
2157 FILE *fp = data->fp;
2158 size_t printed = data->printed;
2159 struct trace *trace = data->trace;
2160 struct thread_trace *ttrace = thread->priv;
2161 const char *color;
2162 double ratio;
2163
2164 if (ttrace == NULL)
2165 return 0;
2166
2167 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2168
2169 color = PERF_COLOR_NORMAL;
2170 if (ratio > 50.0)
2171 color = PERF_COLOR_RED;
2172 else if (ratio > 25.0)
2173 color = PERF_COLOR_GREEN;
2174 else if (ratio > 5.0)
2175 color = PERF_COLOR_YELLOW;
2176
2177 printed += color_fprintf(fp, color, " %s (%d), ", thread__comm_str(thread), thread->tid);
2178 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2179 printed += color_fprintf(fp, color, "%.1f%%", ratio);
2180 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2181 printed += thread__dump_stats(ttrace, trace, fp);
2182
2183 data->printed += printed;
2184
2185 return 0;
2186 }
2187
2188 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2189 {
2190 struct summary_data data = {
2191 .fp = fp,
2192 .trace = trace
2193 };
2194 data.printed = trace__fprintf_threads_header(fp);
2195
2196 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2197
2198 return data.printed;
2199 }
2200
2201 static int trace__set_duration(const struct option *opt, const char *str,
2202 int unset __maybe_unused)
2203 {
2204 struct trace *trace = opt->value;
2205
2206 trace->duration_filter = atof(str);
2207 return 0;
2208 }
2209
2210 static int trace__open_output(struct trace *trace, const char *filename)
2211 {
2212 struct stat st;
2213
2214 if (!stat(filename, &st) && st.st_size) {
2215 char oldname[PATH_MAX];
2216
2217 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2218 unlink(oldname);
2219 rename(filename, oldname);
2220 }
2221
2222 trace->output = fopen(filename, "w");
2223
2224 return trace->output == NULL ? -errno : 0;
2225 }
2226
2227 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2228 {
2229 const char * const trace_usage[] = {
2230 "perf trace [<options>] [<command>]",
2231 "perf trace [<options>] -- <command> [<options>]",
2232 "perf trace record [<options>] [<command>]",
2233 "perf trace record [<options>] -- <command> [<options>]",
2234 NULL
2235 };
2236 struct trace trace = {
2237 .audit = {
2238 .machine = audit_detect_machine(),
2239 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2240 },
2241 .syscalls = {
2242 . max = -1,
2243 },
2244 .opts = {
2245 .target = {
2246 .uid = UINT_MAX,
2247 .uses_mmap = true,
2248 },
2249 .user_freq = UINT_MAX,
2250 .user_interval = ULLONG_MAX,
2251 .no_delay = true,
2252 .mmap_pages = 1024,
2253 },
2254 .output = stdout,
2255 .show_comm = true,
2256 };
2257 const char *output_name = NULL;
2258 const char *ev_qualifier_str = NULL;
2259 const struct option trace_options[] = {
2260 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2261 "show the thread COMM next to its id"),
2262 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2263 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2264 "list of events to trace"),
2265 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2266 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2267 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2268 "trace events on existing process id"),
2269 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2270 "trace events on existing thread id"),
2271 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2272 "system-wide collection from all CPUs"),
2273 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2274 "list of cpus to monitor"),
2275 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2276 "child tasks do not inherit counters"),
2277 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2278 "number of mmap data pages",
2279 perf_evlist__parse_mmap_pages),
2280 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2281 "user to profile"),
2282 OPT_CALLBACK(0, "duration", &trace, "float",
2283 "show only events with duration > N.M ms",
2284 trace__set_duration),
2285 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2286 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2287 OPT_BOOLEAN('T', "time", &trace.full_time,
2288 "Show full timestamp, not time relative to first start"),
2289 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2290 "Show only syscall summary with statistics"),
2291 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2292 "Show all syscalls and summary with statistics"),
2293 OPT_END()
2294 };
2295 int err;
2296 char bf[BUFSIZ];
2297
2298 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2299 return trace__record(argc-2, &argv[2]);
2300
2301 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2302
2303 /* summary_only implies summary option, but don't overwrite summary if set */
2304 if (trace.summary_only)
2305 trace.summary = trace.summary_only;
2306
2307 if (output_name != NULL) {
2308 err = trace__open_output(&trace, output_name);
2309 if (err < 0) {
2310 perror("failed to create output file");
2311 goto out;
2312 }
2313 }
2314
2315 if (ev_qualifier_str != NULL) {
2316 const char *s = ev_qualifier_str;
2317
2318 trace.not_ev_qualifier = *s == '!';
2319 if (trace.not_ev_qualifier)
2320 ++s;
2321 trace.ev_qualifier = strlist__new(true, s);
2322 if (trace.ev_qualifier == NULL) {
2323 fputs("Not enough memory to parse event qualifier",
2324 trace.output);
2325 err = -ENOMEM;
2326 goto out_close;
2327 }
2328 }
2329
2330 err = target__validate(&trace.opts.target);
2331 if (err) {
2332 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2333 fprintf(trace.output, "%s", bf);
2334 goto out_close;
2335 }
2336
2337 err = target__parse_uid(&trace.opts.target);
2338 if (err) {
2339 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2340 fprintf(trace.output, "%s", bf);
2341 goto out_close;
2342 }
2343
2344 if (!argc && target__none(&trace.opts.target))
2345 trace.opts.target.system_wide = true;
2346
2347 if (input_name)
2348 err = trace__replay(&trace);
2349 else
2350 err = trace__run(&trace, argc, argv);
2351
2352 out_close:
2353 if (output_name != NULL)
2354 fclose(trace.output);
2355 out:
2356 return err;
2357 }