]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/perf/builtin-trace.c
perf machine: Introduce synthesize_threads method out of open coded equivalent
[mirror_ubuntu-bionic-kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14
15 #include <libaudit.h>
16 #include <stdlib.h>
17 #include <sys/eventfd.h>
18 #include <sys/mman.h>
19 #include <linux/futex.h>
20
21 /* For older distros: */
22 #ifndef MAP_STACK
23 # define MAP_STACK 0x20000
24 #endif
25
26 #ifndef MADV_HWPOISON
27 # define MADV_HWPOISON 100
28 #endif
29
30 #ifndef MADV_MERGEABLE
31 # define MADV_MERGEABLE 12
32 #endif
33
34 #ifndef MADV_UNMERGEABLE
35 # define MADV_UNMERGEABLE 13
36 #endif
37
38 struct tp_field {
39 int offset;
40 union {
41 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
42 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
43 };
44 };
45
46 #define TP_UINT_FIELD(bits) \
47 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
48 { \
49 return *(u##bits *)(sample->raw_data + field->offset); \
50 }
51
52 TP_UINT_FIELD(8);
53 TP_UINT_FIELD(16);
54 TP_UINT_FIELD(32);
55 TP_UINT_FIELD(64);
56
57 #define TP_UINT_FIELD__SWAPPED(bits) \
58 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
59 { \
60 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
61 return bswap_##bits(value);\
62 }
63
64 TP_UINT_FIELD__SWAPPED(16);
65 TP_UINT_FIELD__SWAPPED(32);
66 TP_UINT_FIELD__SWAPPED(64);
67
68 static int tp_field__init_uint(struct tp_field *field,
69 struct format_field *format_field,
70 bool needs_swap)
71 {
72 field->offset = format_field->offset;
73
74 switch (format_field->size) {
75 case 1:
76 field->integer = tp_field__u8;
77 break;
78 case 2:
79 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
80 break;
81 case 4:
82 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
83 break;
84 case 8:
85 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
86 break;
87 default:
88 return -1;
89 }
90
91 return 0;
92 }
93
94 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
95 {
96 return sample->raw_data + field->offset;
97 }
98
99 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
100 {
101 field->offset = format_field->offset;
102 field->pointer = tp_field__ptr;
103 return 0;
104 }
105
106 struct syscall_tp {
107 struct tp_field id;
108 union {
109 struct tp_field args, ret;
110 };
111 };
112
113 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
114 struct tp_field *field,
115 const char *name)
116 {
117 struct format_field *format_field = perf_evsel__field(evsel, name);
118
119 if (format_field == NULL)
120 return -1;
121
122 return tp_field__init_uint(field, format_field, evsel->needs_swap);
123 }
124
125 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
126 ({ struct syscall_tp *sc = evsel->priv;\
127 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
128
129 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
130 struct tp_field *field,
131 const char *name)
132 {
133 struct format_field *format_field = perf_evsel__field(evsel, name);
134
135 if (format_field == NULL)
136 return -1;
137
138 return tp_field__init_ptr(field, format_field);
139 }
140
141 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
142 ({ struct syscall_tp *sc = evsel->priv;\
143 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
144
145 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
146 {
147 free(evsel->priv);
148 evsel->priv = NULL;
149 perf_evsel__delete(evsel);
150 }
151
152 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
153 {
154 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
155
156 if (evsel) {
157 evsel->priv = malloc(sizeof(struct syscall_tp));
158
159 if (evsel->priv == NULL)
160 goto out_delete;
161
162 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
163 goto out_delete;
164
165 evsel->handler = handler;
166 }
167
168 return evsel;
169
170 out_delete:
171 perf_evsel__delete_priv(evsel);
172 return NULL;
173 }
174
175 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
176 ({ struct syscall_tp *fields = evsel->priv; \
177 fields->name.integer(&fields->name, sample); })
178
179 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
180 ({ struct syscall_tp *fields = evsel->priv; \
181 fields->name.pointer(&fields->name, sample); })
182
183 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
184 void *sys_enter_handler,
185 void *sys_exit_handler)
186 {
187 int ret = -1;
188 struct perf_evsel *sys_enter, *sys_exit;
189
190 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
191 if (sys_enter == NULL)
192 goto out;
193
194 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
195 goto out_delete_sys_enter;
196
197 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
198 if (sys_exit == NULL)
199 goto out_delete_sys_enter;
200
201 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
202 goto out_delete_sys_exit;
203
204 perf_evlist__add(evlist, sys_enter);
205 perf_evlist__add(evlist, sys_exit);
206
207 ret = 0;
208 out:
209 return ret;
210
211 out_delete_sys_exit:
212 perf_evsel__delete_priv(sys_exit);
213 out_delete_sys_enter:
214 perf_evsel__delete_priv(sys_enter);
215 goto out;
216 }
217
218
219 struct syscall_arg {
220 unsigned long val;
221 struct thread *thread;
222 struct trace *trace;
223 void *parm;
224 u8 idx;
225 u8 mask;
226 };
227
228 struct strarray {
229 int offset;
230 int nr_entries;
231 const char **entries;
232 };
233
234 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
235 .nr_entries = ARRAY_SIZE(array), \
236 .entries = array, \
237 }
238
239 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
240 .offset = off, \
241 .nr_entries = ARRAY_SIZE(array), \
242 .entries = array, \
243 }
244
245 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
246 const char *intfmt,
247 struct syscall_arg *arg)
248 {
249 struct strarray *sa = arg->parm;
250 int idx = arg->val - sa->offset;
251
252 if (idx < 0 || idx >= sa->nr_entries)
253 return scnprintf(bf, size, intfmt, arg->val);
254
255 return scnprintf(bf, size, "%s", sa->entries[idx]);
256 }
257
258 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
259 struct syscall_arg *arg)
260 {
261 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
262 }
263
264 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
265
266 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
267 struct syscall_arg *arg)
268 {
269 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
270 }
271
272 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
273
274 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
275 struct syscall_arg *arg);
276
277 #define SCA_FD syscall_arg__scnprintf_fd
278
279 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
280 struct syscall_arg *arg)
281 {
282 int fd = arg->val;
283
284 if (fd == AT_FDCWD)
285 return scnprintf(bf, size, "CWD");
286
287 return syscall_arg__scnprintf_fd(bf, size, arg);
288 }
289
290 #define SCA_FDAT syscall_arg__scnprintf_fd_at
291
292 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
293 struct syscall_arg *arg);
294
295 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
296
297 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
298 struct syscall_arg *arg)
299 {
300 return scnprintf(bf, size, "%#lx", arg->val);
301 }
302
303 #define SCA_HEX syscall_arg__scnprintf_hex
304
305 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
306 struct syscall_arg *arg)
307 {
308 int printed = 0, prot = arg->val;
309
310 if (prot == PROT_NONE)
311 return scnprintf(bf, size, "NONE");
312 #define P_MMAP_PROT(n) \
313 if (prot & PROT_##n) { \
314 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
315 prot &= ~PROT_##n; \
316 }
317
318 P_MMAP_PROT(EXEC);
319 P_MMAP_PROT(READ);
320 P_MMAP_PROT(WRITE);
321 #ifdef PROT_SEM
322 P_MMAP_PROT(SEM);
323 #endif
324 P_MMAP_PROT(GROWSDOWN);
325 P_MMAP_PROT(GROWSUP);
326 #undef P_MMAP_PROT
327
328 if (prot)
329 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
330
331 return printed;
332 }
333
334 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
335
336 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
337 struct syscall_arg *arg)
338 {
339 int printed = 0, flags = arg->val;
340
341 #define P_MMAP_FLAG(n) \
342 if (flags & MAP_##n) { \
343 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344 flags &= ~MAP_##n; \
345 }
346
347 P_MMAP_FLAG(SHARED);
348 P_MMAP_FLAG(PRIVATE);
349 #ifdef MAP_32BIT
350 P_MMAP_FLAG(32BIT);
351 #endif
352 P_MMAP_FLAG(ANONYMOUS);
353 P_MMAP_FLAG(DENYWRITE);
354 P_MMAP_FLAG(EXECUTABLE);
355 P_MMAP_FLAG(FILE);
356 P_MMAP_FLAG(FIXED);
357 P_MMAP_FLAG(GROWSDOWN);
358 #ifdef MAP_HUGETLB
359 P_MMAP_FLAG(HUGETLB);
360 #endif
361 P_MMAP_FLAG(LOCKED);
362 P_MMAP_FLAG(NONBLOCK);
363 P_MMAP_FLAG(NORESERVE);
364 P_MMAP_FLAG(POPULATE);
365 P_MMAP_FLAG(STACK);
366 #ifdef MAP_UNINITIALIZED
367 P_MMAP_FLAG(UNINITIALIZED);
368 #endif
369 #undef P_MMAP_FLAG
370
371 if (flags)
372 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
373
374 return printed;
375 }
376
377 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
378
379 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
380 struct syscall_arg *arg)
381 {
382 int behavior = arg->val;
383
384 switch (behavior) {
385 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
386 P_MADV_BHV(NORMAL);
387 P_MADV_BHV(RANDOM);
388 P_MADV_BHV(SEQUENTIAL);
389 P_MADV_BHV(WILLNEED);
390 P_MADV_BHV(DONTNEED);
391 P_MADV_BHV(REMOVE);
392 P_MADV_BHV(DONTFORK);
393 P_MADV_BHV(DOFORK);
394 P_MADV_BHV(HWPOISON);
395 #ifdef MADV_SOFT_OFFLINE
396 P_MADV_BHV(SOFT_OFFLINE);
397 #endif
398 P_MADV_BHV(MERGEABLE);
399 P_MADV_BHV(UNMERGEABLE);
400 #ifdef MADV_HUGEPAGE
401 P_MADV_BHV(HUGEPAGE);
402 #endif
403 #ifdef MADV_NOHUGEPAGE
404 P_MADV_BHV(NOHUGEPAGE);
405 #endif
406 #ifdef MADV_DONTDUMP
407 P_MADV_BHV(DONTDUMP);
408 #endif
409 #ifdef MADV_DODUMP
410 P_MADV_BHV(DODUMP);
411 #endif
412 #undef P_MADV_PHV
413 default: break;
414 }
415
416 return scnprintf(bf, size, "%#x", behavior);
417 }
418
419 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
420
421 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
422 struct syscall_arg *arg)
423 {
424 int printed = 0, op = arg->val;
425
426 if (op == 0)
427 return scnprintf(bf, size, "NONE");
428 #define P_CMD(cmd) \
429 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
430 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
431 op &= ~LOCK_##cmd; \
432 }
433
434 P_CMD(SH);
435 P_CMD(EX);
436 P_CMD(NB);
437 P_CMD(UN);
438 P_CMD(MAND);
439 P_CMD(RW);
440 P_CMD(READ);
441 P_CMD(WRITE);
442 #undef P_OP
443
444 if (op)
445 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
446
447 return printed;
448 }
449
450 #define SCA_FLOCK syscall_arg__scnprintf_flock
451
452 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
453 {
454 enum syscall_futex_args {
455 SCF_UADDR = (1 << 0),
456 SCF_OP = (1 << 1),
457 SCF_VAL = (1 << 2),
458 SCF_TIMEOUT = (1 << 3),
459 SCF_UADDR2 = (1 << 4),
460 SCF_VAL3 = (1 << 5),
461 };
462 int op = arg->val;
463 int cmd = op & FUTEX_CMD_MASK;
464 size_t printed = 0;
465
466 switch (cmd) {
467 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
468 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
469 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
470 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
471 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
472 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
473 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
474 P_FUTEX_OP(WAKE_OP); break;
475 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
476 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
477 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
478 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
479 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
480 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
481 default: printed = scnprintf(bf, size, "%#x", cmd); break;
482 }
483
484 if (op & FUTEX_PRIVATE_FLAG)
485 printed += scnprintf(bf + printed, size - printed, "|PRIV");
486
487 if (op & FUTEX_CLOCK_REALTIME)
488 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
489
490 return printed;
491 }
492
493 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
494
495 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
496 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
497
498 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
499 static DEFINE_STRARRAY(itimers);
500
501 static const char *whences[] = { "SET", "CUR", "END",
502 #ifdef SEEK_DATA
503 "DATA",
504 #endif
505 #ifdef SEEK_HOLE
506 "HOLE",
507 #endif
508 };
509 static DEFINE_STRARRAY(whences);
510
511 static const char *fcntl_cmds[] = {
512 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
513 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
514 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
515 "F_GETOWNER_UIDS",
516 };
517 static DEFINE_STRARRAY(fcntl_cmds);
518
519 static const char *rlimit_resources[] = {
520 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
521 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
522 "RTTIME",
523 };
524 static DEFINE_STRARRAY(rlimit_resources);
525
526 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
527 static DEFINE_STRARRAY(sighow);
528
529 static const char *clockid[] = {
530 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
531 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
532 };
533 static DEFINE_STRARRAY(clockid);
534
535 static const char *socket_families[] = {
536 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
537 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
538 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
539 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
540 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
541 "ALG", "NFC", "VSOCK",
542 };
543 static DEFINE_STRARRAY(socket_families);
544
545 #ifndef SOCK_TYPE_MASK
546 #define SOCK_TYPE_MASK 0xf
547 #endif
548
549 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
550 struct syscall_arg *arg)
551 {
552 size_t printed;
553 int type = arg->val,
554 flags = type & ~SOCK_TYPE_MASK;
555
556 type &= SOCK_TYPE_MASK;
557 /*
558 * Can't use a strarray, MIPS may override for ABI reasons.
559 */
560 switch (type) {
561 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
562 P_SK_TYPE(STREAM);
563 P_SK_TYPE(DGRAM);
564 P_SK_TYPE(RAW);
565 P_SK_TYPE(RDM);
566 P_SK_TYPE(SEQPACKET);
567 P_SK_TYPE(DCCP);
568 P_SK_TYPE(PACKET);
569 #undef P_SK_TYPE
570 default:
571 printed = scnprintf(bf, size, "%#x", type);
572 }
573
574 #define P_SK_FLAG(n) \
575 if (flags & SOCK_##n) { \
576 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
577 flags &= ~SOCK_##n; \
578 }
579
580 P_SK_FLAG(CLOEXEC);
581 P_SK_FLAG(NONBLOCK);
582 #undef P_SK_FLAG
583
584 if (flags)
585 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
586
587 return printed;
588 }
589
590 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
591
592 #ifndef MSG_PROBE
593 #define MSG_PROBE 0x10
594 #endif
595 #ifndef MSG_WAITFORONE
596 #define MSG_WAITFORONE 0x10000
597 #endif
598 #ifndef MSG_SENDPAGE_NOTLAST
599 #define MSG_SENDPAGE_NOTLAST 0x20000
600 #endif
601 #ifndef MSG_FASTOPEN
602 #define MSG_FASTOPEN 0x20000000
603 #endif
604
605 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
606 struct syscall_arg *arg)
607 {
608 int printed = 0, flags = arg->val;
609
610 if (flags == 0)
611 return scnprintf(bf, size, "NONE");
612 #define P_MSG_FLAG(n) \
613 if (flags & MSG_##n) { \
614 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
615 flags &= ~MSG_##n; \
616 }
617
618 P_MSG_FLAG(OOB);
619 P_MSG_FLAG(PEEK);
620 P_MSG_FLAG(DONTROUTE);
621 P_MSG_FLAG(TRYHARD);
622 P_MSG_FLAG(CTRUNC);
623 P_MSG_FLAG(PROBE);
624 P_MSG_FLAG(TRUNC);
625 P_MSG_FLAG(DONTWAIT);
626 P_MSG_FLAG(EOR);
627 P_MSG_FLAG(WAITALL);
628 P_MSG_FLAG(FIN);
629 P_MSG_FLAG(SYN);
630 P_MSG_FLAG(CONFIRM);
631 P_MSG_FLAG(RST);
632 P_MSG_FLAG(ERRQUEUE);
633 P_MSG_FLAG(NOSIGNAL);
634 P_MSG_FLAG(MORE);
635 P_MSG_FLAG(WAITFORONE);
636 P_MSG_FLAG(SENDPAGE_NOTLAST);
637 P_MSG_FLAG(FASTOPEN);
638 P_MSG_FLAG(CMSG_CLOEXEC);
639 #undef P_MSG_FLAG
640
641 if (flags)
642 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
643
644 return printed;
645 }
646
647 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
648
649 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
650 struct syscall_arg *arg)
651 {
652 size_t printed = 0;
653 int mode = arg->val;
654
655 if (mode == F_OK) /* 0 */
656 return scnprintf(bf, size, "F");
657 #define P_MODE(n) \
658 if (mode & n##_OK) { \
659 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
660 mode &= ~n##_OK; \
661 }
662
663 P_MODE(R);
664 P_MODE(W);
665 P_MODE(X);
666 #undef P_MODE
667
668 if (mode)
669 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
670
671 return printed;
672 }
673
674 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
675
676 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
677 struct syscall_arg *arg)
678 {
679 int printed = 0, flags = arg->val;
680
681 if (!(flags & O_CREAT))
682 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
683
684 if (flags == 0)
685 return scnprintf(bf, size, "RDONLY");
686 #define P_FLAG(n) \
687 if (flags & O_##n) { \
688 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
689 flags &= ~O_##n; \
690 }
691
692 P_FLAG(APPEND);
693 P_FLAG(ASYNC);
694 P_FLAG(CLOEXEC);
695 P_FLAG(CREAT);
696 P_FLAG(DIRECT);
697 P_FLAG(DIRECTORY);
698 P_FLAG(EXCL);
699 P_FLAG(LARGEFILE);
700 P_FLAG(NOATIME);
701 P_FLAG(NOCTTY);
702 #ifdef O_NONBLOCK
703 P_FLAG(NONBLOCK);
704 #elif O_NDELAY
705 P_FLAG(NDELAY);
706 #endif
707 #ifdef O_PATH
708 P_FLAG(PATH);
709 #endif
710 P_FLAG(RDWR);
711 #ifdef O_DSYNC
712 if ((flags & O_SYNC) == O_SYNC)
713 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
714 else {
715 P_FLAG(DSYNC);
716 }
717 #else
718 P_FLAG(SYNC);
719 #endif
720 P_FLAG(TRUNC);
721 P_FLAG(WRONLY);
722 #undef P_FLAG
723
724 if (flags)
725 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
726
727 return printed;
728 }
729
730 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
731
732 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
733 struct syscall_arg *arg)
734 {
735 int printed = 0, flags = arg->val;
736
737 if (flags == 0)
738 return scnprintf(bf, size, "NONE");
739 #define P_FLAG(n) \
740 if (flags & EFD_##n) { \
741 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
742 flags &= ~EFD_##n; \
743 }
744
745 P_FLAG(SEMAPHORE);
746 P_FLAG(CLOEXEC);
747 P_FLAG(NONBLOCK);
748 #undef P_FLAG
749
750 if (flags)
751 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
752
753 return printed;
754 }
755
756 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
757
758 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
759 struct syscall_arg *arg)
760 {
761 int printed = 0, flags = arg->val;
762
763 #define P_FLAG(n) \
764 if (flags & O_##n) { \
765 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
766 flags &= ~O_##n; \
767 }
768
769 P_FLAG(CLOEXEC);
770 P_FLAG(NONBLOCK);
771 #undef P_FLAG
772
773 if (flags)
774 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
775
776 return printed;
777 }
778
779 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
780
781 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
782 {
783 int sig = arg->val;
784
785 switch (sig) {
786 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
787 P_SIGNUM(HUP);
788 P_SIGNUM(INT);
789 P_SIGNUM(QUIT);
790 P_SIGNUM(ILL);
791 P_SIGNUM(TRAP);
792 P_SIGNUM(ABRT);
793 P_SIGNUM(BUS);
794 P_SIGNUM(FPE);
795 P_SIGNUM(KILL);
796 P_SIGNUM(USR1);
797 P_SIGNUM(SEGV);
798 P_SIGNUM(USR2);
799 P_SIGNUM(PIPE);
800 P_SIGNUM(ALRM);
801 P_SIGNUM(TERM);
802 P_SIGNUM(STKFLT);
803 P_SIGNUM(CHLD);
804 P_SIGNUM(CONT);
805 P_SIGNUM(STOP);
806 P_SIGNUM(TSTP);
807 P_SIGNUM(TTIN);
808 P_SIGNUM(TTOU);
809 P_SIGNUM(URG);
810 P_SIGNUM(XCPU);
811 P_SIGNUM(XFSZ);
812 P_SIGNUM(VTALRM);
813 P_SIGNUM(PROF);
814 P_SIGNUM(WINCH);
815 P_SIGNUM(IO);
816 P_SIGNUM(PWR);
817 P_SIGNUM(SYS);
818 default: break;
819 }
820
821 return scnprintf(bf, size, "%#x", sig);
822 }
823
824 #define SCA_SIGNUM syscall_arg__scnprintf_signum
825
826 #define TCGETS 0x5401
827
828 static const char *tioctls[] = {
829 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
830 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
831 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
832 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
833 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
834 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
835 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
836 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
837 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
838 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
839 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
840 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
841 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
842 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
843 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
844 };
845
846 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
847
848 #define STRARRAY(arg, name, array) \
849 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
850 .arg_parm = { [arg] = &strarray__##array, }
851
852 static struct syscall_fmt {
853 const char *name;
854 const char *alias;
855 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
856 void *arg_parm[6];
857 bool errmsg;
858 bool timeout;
859 bool hexret;
860 } syscall_fmts[] = {
861 { .name = "access", .errmsg = true,
862 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
863 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
864 { .name = "brk", .hexret = true,
865 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
866 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
867 { .name = "close", .errmsg = true,
868 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
869 { .name = "connect", .errmsg = true, },
870 { .name = "dup", .errmsg = true,
871 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
872 { .name = "dup2", .errmsg = true,
873 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
874 { .name = "dup3", .errmsg = true,
875 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
876 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
877 { .name = "eventfd2", .errmsg = true,
878 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
879 { .name = "faccessat", .errmsg = true,
880 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
881 { .name = "fadvise64", .errmsg = true,
882 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
883 { .name = "fallocate", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
885 { .name = "fchdir", .errmsg = true,
886 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
887 { .name = "fchmod", .errmsg = true,
888 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
889 { .name = "fchmodat", .errmsg = true,
890 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
891 { .name = "fchown", .errmsg = true,
892 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
893 { .name = "fchownat", .errmsg = true,
894 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
895 { .name = "fcntl", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FD, /* fd */
897 [1] = SCA_STRARRAY, /* cmd */ },
898 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
899 { .name = "fdatasync", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "flock", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */
903 [1] = SCA_FLOCK, /* cmd */ }, },
904 { .name = "fsetxattr", .errmsg = true,
905 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
906 { .name = "fstat", .errmsg = true, .alias = "newfstat",
907 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
908 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
909 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
910 { .name = "fstatfs", .errmsg = true,
911 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
912 { .name = "fsync", .errmsg = true,
913 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
914 { .name = "ftruncate", .errmsg = true,
915 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
916 { .name = "futex", .errmsg = true,
917 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
918 { .name = "futimesat", .errmsg = true,
919 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
920 { .name = "getdents", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "getdents64", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
925 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
926 { .name = "ioctl", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */
928 [1] = SCA_STRHEXARRAY, /* cmd */
929 [2] = SCA_HEX, /* arg */ },
930 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
931 { .name = "kill", .errmsg = true,
932 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
933 { .name = "linkat", .errmsg = true,
934 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
935 { .name = "lseek", .errmsg = true,
936 .arg_scnprintf = { [0] = SCA_FD, /* fd */
937 [2] = SCA_STRARRAY, /* whence */ },
938 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
939 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
940 { .name = "madvise", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_HEX, /* start */
942 [2] = SCA_MADV_BHV, /* behavior */ }, },
943 { .name = "mkdirat", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
945 { .name = "mknodat", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
947 { .name = "mlock", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
949 { .name = "mlockall", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
951 { .name = "mmap", .hexret = true,
952 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
953 [2] = SCA_MMAP_PROT, /* prot */
954 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
955 { .name = "mprotect", .errmsg = true,
956 .arg_scnprintf = { [0] = SCA_HEX, /* start */
957 [2] = SCA_MMAP_PROT, /* prot */ }, },
958 { .name = "mremap", .hexret = true,
959 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
960 [4] = SCA_HEX, /* new_addr */ }, },
961 { .name = "munlock", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
963 { .name = "munmap", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "name_to_handle_at", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
967 { .name = "newfstatat", .errmsg = true,
968 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
969 { .name = "open", .errmsg = true,
970 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
971 { .name = "open_by_handle_at", .errmsg = true,
972 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
973 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
974 { .name = "openat", .errmsg = true,
975 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
976 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
977 { .name = "pipe2", .errmsg = true,
978 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
979 { .name = "poll", .errmsg = true, .timeout = true, },
980 { .name = "ppoll", .errmsg = true, .timeout = true, },
981 { .name = "pread", .errmsg = true, .alias = "pread64",
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983 { .name = "preadv", .errmsg = true, .alias = "pread",
984 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
985 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
986 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "pwritev", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
990 { .name = "read", .errmsg = true,
991 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
992 { .name = "readlinkat", .errmsg = true,
993 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
994 { .name = "readv", .errmsg = true,
995 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
996 { .name = "recvfrom", .errmsg = true,
997 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
998 { .name = "recvmmsg", .errmsg = true,
999 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1000 { .name = "recvmsg", .errmsg = true,
1001 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1002 { .name = "renameat", .errmsg = true,
1003 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1004 { .name = "rt_sigaction", .errmsg = true,
1005 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1006 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1007 { .name = "rt_sigqueueinfo", .errmsg = true,
1008 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1009 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1010 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1011 { .name = "select", .errmsg = true, .timeout = true, },
1012 { .name = "sendmmsg", .errmsg = true,
1013 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1014 { .name = "sendmsg", .errmsg = true,
1015 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1016 { .name = "sendto", .errmsg = true,
1017 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1018 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1019 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1020 { .name = "shutdown", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1022 { .name = "socket", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1024 [1] = SCA_SK_TYPE, /* type */ },
1025 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1026 { .name = "socketpair", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1028 [1] = SCA_SK_TYPE, /* type */ },
1029 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1030 { .name = "stat", .errmsg = true, .alias = "newstat", },
1031 { .name = "symlinkat", .errmsg = true,
1032 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1033 { .name = "tgkill", .errmsg = true,
1034 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1035 { .name = "tkill", .errmsg = true,
1036 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1037 { .name = "uname", .errmsg = true, .alias = "newuname", },
1038 { .name = "unlinkat", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1040 { .name = "utimensat", .errmsg = true,
1041 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1042 { .name = "write", .errmsg = true,
1043 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1044 { .name = "writev", .errmsg = true,
1045 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1046 };
1047
1048 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1049 {
1050 const struct syscall_fmt *fmt = fmtp;
1051 return strcmp(name, fmt->name);
1052 }
1053
1054 static struct syscall_fmt *syscall_fmt__find(const char *name)
1055 {
1056 const int nmemb = ARRAY_SIZE(syscall_fmts);
1057 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1058 }
1059
1060 struct syscall {
1061 struct event_format *tp_format;
1062 const char *name;
1063 bool filtered;
1064 struct syscall_fmt *fmt;
1065 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1066 void **arg_parm;
1067 };
1068
1069 static size_t fprintf_duration(unsigned long t, FILE *fp)
1070 {
1071 double duration = (double)t / NSEC_PER_MSEC;
1072 size_t printed = fprintf(fp, "(");
1073
1074 if (duration >= 1.0)
1075 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1076 else if (duration >= 0.01)
1077 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1078 else
1079 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1080 return printed + fprintf(fp, "): ");
1081 }
1082
1083 struct thread_trace {
1084 u64 entry_time;
1085 u64 exit_time;
1086 bool entry_pending;
1087 unsigned long nr_events;
1088 char *entry_str;
1089 double runtime_ms;
1090 struct {
1091 int max;
1092 char **table;
1093 } paths;
1094
1095 struct intlist *syscall_stats;
1096 };
1097
1098 static struct thread_trace *thread_trace__new(void)
1099 {
1100 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1101
1102 if (ttrace)
1103 ttrace->paths.max = -1;
1104
1105 ttrace->syscall_stats = intlist__new(NULL);
1106
1107 return ttrace;
1108 }
1109
1110 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1111 {
1112 struct thread_trace *ttrace;
1113
1114 if (thread == NULL)
1115 goto fail;
1116
1117 if (thread->priv == NULL)
1118 thread->priv = thread_trace__new();
1119
1120 if (thread->priv == NULL)
1121 goto fail;
1122
1123 ttrace = thread->priv;
1124 ++ttrace->nr_events;
1125
1126 return ttrace;
1127 fail:
1128 color_fprintf(fp, PERF_COLOR_RED,
1129 "WARNING: not enough memory, dropping samples!\n");
1130 return NULL;
1131 }
1132
1133 struct trace {
1134 struct perf_tool tool;
1135 struct {
1136 int machine;
1137 int open_id;
1138 } audit;
1139 struct {
1140 int max;
1141 struct syscall *table;
1142 } syscalls;
1143 struct perf_record_opts opts;
1144 struct machine *host;
1145 u64 base_time;
1146 bool full_time;
1147 FILE *output;
1148 unsigned long nr_events;
1149 struct strlist *ev_qualifier;
1150 bool not_ev_qualifier;
1151 bool live;
1152 const char *last_vfs_getname;
1153 struct intlist *tid_list;
1154 struct intlist *pid_list;
1155 bool sched;
1156 bool multiple_threads;
1157 bool summary;
1158 bool show_comm;
1159 bool show_tool_stats;
1160 double duration_filter;
1161 double runtime_ms;
1162 struct {
1163 u64 vfs_getname, proc_getname;
1164 } stats;
1165 };
1166
1167 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1168 {
1169 struct thread_trace *ttrace = thread->priv;
1170
1171 if (fd > ttrace->paths.max) {
1172 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1173
1174 if (npath == NULL)
1175 return -1;
1176
1177 if (ttrace->paths.max != -1) {
1178 memset(npath + ttrace->paths.max + 1, 0,
1179 (fd - ttrace->paths.max) * sizeof(char *));
1180 } else {
1181 memset(npath, 0, (fd + 1) * sizeof(char *));
1182 }
1183
1184 ttrace->paths.table = npath;
1185 ttrace->paths.max = fd;
1186 }
1187
1188 ttrace->paths.table[fd] = strdup(pathname);
1189
1190 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1191 }
1192
1193 static int thread__read_fd_path(struct thread *thread, int fd)
1194 {
1195 char linkname[PATH_MAX], pathname[PATH_MAX];
1196 struct stat st;
1197 int ret;
1198
1199 if (thread->pid_ == thread->tid) {
1200 scnprintf(linkname, sizeof(linkname),
1201 "/proc/%d/fd/%d", thread->pid_, fd);
1202 } else {
1203 scnprintf(linkname, sizeof(linkname),
1204 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1205 }
1206
1207 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1208 return -1;
1209
1210 ret = readlink(linkname, pathname, sizeof(pathname));
1211
1212 if (ret < 0 || ret > st.st_size)
1213 return -1;
1214
1215 pathname[ret] = '\0';
1216 return trace__set_fd_pathname(thread, fd, pathname);
1217 }
1218
1219 static const char *thread__fd_path(struct thread *thread, int fd,
1220 struct trace *trace)
1221 {
1222 struct thread_trace *ttrace = thread->priv;
1223
1224 if (ttrace == NULL)
1225 return NULL;
1226
1227 if (fd < 0)
1228 return NULL;
1229
1230 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1231 if (!trace->live)
1232 return NULL;
1233 ++trace->stats.proc_getname;
1234 if (thread__read_fd_path(thread, fd)) {
1235 return NULL;
1236 }
1237
1238 return ttrace->paths.table[fd];
1239 }
1240
1241 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1242 struct syscall_arg *arg)
1243 {
1244 int fd = arg->val;
1245 size_t printed = scnprintf(bf, size, "%d", fd);
1246 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1247
1248 if (path)
1249 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1250
1251 return printed;
1252 }
1253
1254 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1255 struct syscall_arg *arg)
1256 {
1257 int fd = arg->val;
1258 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1259 struct thread_trace *ttrace = arg->thread->priv;
1260
1261 if (ttrace && fd >= 0 && fd <= ttrace->paths.max) {
1262 free(ttrace->paths.table[fd]);
1263 ttrace->paths.table[fd] = NULL;
1264 }
1265
1266 return printed;
1267 }
1268
1269 static bool trace__filter_duration(struct trace *trace, double t)
1270 {
1271 return t < (trace->duration_filter * NSEC_PER_MSEC);
1272 }
1273
1274 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1275 {
1276 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1277
1278 return fprintf(fp, "%10.3f ", ts);
1279 }
1280
1281 static bool done = false;
1282 static bool interrupted = false;
1283
1284 static void sig_handler(int sig)
1285 {
1286 done = true;
1287 interrupted = sig == SIGINT;
1288 }
1289
1290 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1291 u64 duration, u64 tstamp, FILE *fp)
1292 {
1293 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1294 printed += fprintf_duration(duration, fp);
1295
1296 if (trace->multiple_threads) {
1297 if (trace->show_comm)
1298 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1299 printed += fprintf(fp, "%d ", thread->tid);
1300 }
1301
1302 return printed;
1303 }
1304
1305 static int trace__process_event(struct trace *trace, struct machine *machine,
1306 union perf_event *event, struct perf_sample *sample)
1307 {
1308 int ret = 0;
1309
1310 switch (event->header.type) {
1311 case PERF_RECORD_LOST:
1312 color_fprintf(trace->output, PERF_COLOR_RED,
1313 "LOST %" PRIu64 " events!\n", event->lost.lost);
1314 ret = machine__process_lost_event(machine, event, sample);
1315 default:
1316 ret = machine__process_event(machine, event, sample);
1317 break;
1318 }
1319
1320 return ret;
1321 }
1322
1323 static int trace__tool_process(struct perf_tool *tool,
1324 union perf_event *event,
1325 struct perf_sample *sample,
1326 struct machine *machine)
1327 {
1328 struct trace *trace = container_of(tool, struct trace, tool);
1329 return trace__process_event(trace, machine, event, sample);
1330 }
1331
1332 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1333 {
1334 int err = symbol__init();
1335
1336 if (err)
1337 return err;
1338
1339 trace->host = machine__new_host();
1340 if (trace->host == NULL)
1341 return -ENOMEM;
1342
1343 err = machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1344 evlist->threads, trace__tool_process, false);
1345 if (err)
1346 symbol__exit();
1347
1348 return err;
1349 }
1350
1351 static int syscall__set_arg_fmts(struct syscall *sc)
1352 {
1353 struct format_field *field;
1354 int idx = 0;
1355
1356 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1357 if (sc->arg_scnprintf == NULL)
1358 return -1;
1359
1360 if (sc->fmt)
1361 sc->arg_parm = sc->fmt->arg_parm;
1362
1363 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1364 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1365 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1366 else if (field->flags & FIELD_IS_POINTER)
1367 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1368 ++idx;
1369 }
1370
1371 return 0;
1372 }
1373
1374 static int trace__read_syscall_info(struct trace *trace, int id)
1375 {
1376 char tp_name[128];
1377 struct syscall *sc;
1378 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1379
1380 if (name == NULL)
1381 return -1;
1382
1383 if (id > trace->syscalls.max) {
1384 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1385
1386 if (nsyscalls == NULL)
1387 return -1;
1388
1389 if (trace->syscalls.max != -1) {
1390 memset(nsyscalls + trace->syscalls.max + 1, 0,
1391 (id - trace->syscalls.max) * sizeof(*sc));
1392 } else {
1393 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1394 }
1395
1396 trace->syscalls.table = nsyscalls;
1397 trace->syscalls.max = id;
1398 }
1399
1400 sc = trace->syscalls.table + id;
1401 sc->name = name;
1402
1403 if (trace->ev_qualifier) {
1404 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1405
1406 if (!(in ^ trace->not_ev_qualifier)) {
1407 sc->filtered = true;
1408 /*
1409 * No need to do read tracepoint information since this will be
1410 * filtered out.
1411 */
1412 return 0;
1413 }
1414 }
1415
1416 sc->fmt = syscall_fmt__find(sc->name);
1417
1418 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1419 sc->tp_format = event_format__new("syscalls", tp_name);
1420
1421 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1422 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1423 sc->tp_format = event_format__new("syscalls", tp_name);
1424 }
1425
1426 if (sc->tp_format == NULL)
1427 return -1;
1428
1429 return syscall__set_arg_fmts(sc);
1430 }
1431
1432 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1433 unsigned long *args, struct trace *trace,
1434 struct thread *thread)
1435 {
1436 size_t printed = 0;
1437
1438 if (sc->tp_format != NULL) {
1439 struct format_field *field;
1440 u8 bit = 1;
1441 struct syscall_arg arg = {
1442 .idx = 0,
1443 .mask = 0,
1444 .trace = trace,
1445 .thread = thread,
1446 };
1447
1448 for (field = sc->tp_format->format.fields->next; field;
1449 field = field->next, ++arg.idx, bit <<= 1) {
1450 if (arg.mask & bit)
1451 continue;
1452 /*
1453 * Suppress this argument if its value is zero and
1454 * and we don't have a string associated in an
1455 * strarray for it.
1456 */
1457 if (args[arg.idx] == 0 &&
1458 !(sc->arg_scnprintf &&
1459 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1460 sc->arg_parm[arg.idx]))
1461 continue;
1462
1463 printed += scnprintf(bf + printed, size - printed,
1464 "%s%s: ", printed ? ", " : "", field->name);
1465 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1466 arg.val = args[arg.idx];
1467 if (sc->arg_parm)
1468 arg.parm = sc->arg_parm[arg.idx];
1469 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1470 size - printed, &arg);
1471 } else {
1472 printed += scnprintf(bf + printed, size - printed,
1473 "%ld", args[arg.idx]);
1474 }
1475 }
1476 } else {
1477 int i = 0;
1478
1479 while (i < 6) {
1480 printed += scnprintf(bf + printed, size - printed,
1481 "%sarg%d: %ld",
1482 printed ? ", " : "", i, args[i]);
1483 ++i;
1484 }
1485 }
1486
1487 return printed;
1488 }
1489
1490 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1491 struct perf_sample *sample);
1492
1493 static struct syscall *trace__syscall_info(struct trace *trace,
1494 struct perf_evsel *evsel, int id)
1495 {
1496
1497 if (id < 0) {
1498
1499 /*
1500 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1501 * before that, leaving at a higher verbosity level till that is
1502 * explained. Reproduced with plain ftrace with:
1503 *
1504 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1505 * grep "NR -1 " /t/trace_pipe
1506 *
1507 * After generating some load on the machine.
1508 */
1509 if (verbose > 1) {
1510 static u64 n;
1511 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1512 id, perf_evsel__name(evsel), ++n);
1513 }
1514 return NULL;
1515 }
1516
1517 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1518 trace__read_syscall_info(trace, id))
1519 goto out_cant_read;
1520
1521 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1522 goto out_cant_read;
1523
1524 return &trace->syscalls.table[id];
1525
1526 out_cant_read:
1527 if (verbose) {
1528 fprintf(trace->output, "Problems reading syscall %d", id);
1529 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1530 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1531 fputs(" information\n", trace->output);
1532 }
1533 return NULL;
1534 }
1535
1536 static void thread__update_stats(struct thread_trace *ttrace,
1537 int id, struct perf_sample *sample)
1538 {
1539 struct int_node *inode;
1540 struct stats *stats;
1541 u64 duration = 0;
1542
1543 inode = intlist__findnew(ttrace->syscall_stats, id);
1544 if (inode == NULL)
1545 return;
1546
1547 stats = inode->priv;
1548 if (stats == NULL) {
1549 stats = malloc(sizeof(struct stats));
1550 if (stats == NULL)
1551 return;
1552 init_stats(stats);
1553 inode->priv = stats;
1554 }
1555
1556 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1557 duration = sample->time - ttrace->entry_time;
1558
1559 update_stats(stats, duration);
1560 }
1561
1562 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1563 struct perf_sample *sample)
1564 {
1565 char *msg;
1566 void *args;
1567 size_t printed = 0;
1568 struct thread *thread;
1569 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1570 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1571 struct thread_trace *ttrace;
1572
1573 if (sc == NULL)
1574 return -1;
1575
1576 if (sc->filtered)
1577 return 0;
1578
1579 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1580 ttrace = thread__trace(thread, trace->output);
1581 if (ttrace == NULL)
1582 return -1;
1583
1584 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1585 ttrace = thread->priv;
1586
1587 if (ttrace->entry_str == NULL) {
1588 ttrace->entry_str = malloc(1024);
1589 if (!ttrace->entry_str)
1590 return -1;
1591 }
1592
1593 ttrace->entry_time = sample->time;
1594 msg = ttrace->entry_str;
1595 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1596
1597 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1598 args, trace, thread);
1599
1600 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1601 if (!trace->duration_filter) {
1602 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1603 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1604 }
1605 } else
1606 ttrace->entry_pending = true;
1607
1608 return 0;
1609 }
1610
1611 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1612 struct perf_sample *sample)
1613 {
1614 int ret;
1615 u64 duration = 0;
1616 struct thread *thread;
1617 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1618 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1619 struct thread_trace *ttrace;
1620
1621 if (sc == NULL)
1622 return -1;
1623
1624 if (sc->filtered)
1625 return 0;
1626
1627 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1628 ttrace = thread__trace(thread, trace->output);
1629 if (ttrace == NULL)
1630 return -1;
1631
1632 if (trace->summary)
1633 thread__update_stats(ttrace, id, sample);
1634
1635 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1636
1637 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1638 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1639 trace->last_vfs_getname = NULL;
1640 ++trace->stats.vfs_getname;
1641 }
1642
1643 ttrace = thread->priv;
1644
1645 ttrace->exit_time = sample->time;
1646
1647 if (ttrace->entry_time) {
1648 duration = sample->time - ttrace->entry_time;
1649 if (trace__filter_duration(trace, duration))
1650 goto out;
1651 } else if (trace->duration_filter)
1652 goto out;
1653
1654 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1655
1656 if (ttrace->entry_pending) {
1657 fprintf(trace->output, "%-70s", ttrace->entry_str);
1658 } else {
1659 fprintf(trace->output, " ... [");
1660 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1661 fprintf(trace->output, "]: %s()", sc->name);
1662 }
1663
1664 if (sc->fmt == NULL) {
1665 signed_print:
1666 fprintf(trace->output, ") = %d", ret);
1667 } else if (ret < 0 && sc->fmt->errmsg) {
1668 char bf[256];
1669 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1670 *e = audit_errno_to_name(-ret);
1671
1672 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1673 } else if (ret == 0 && sc->fmt->timeout)
1674 fprintf(trace->output, ") = 0 Timeout");
1675 else if (sc->fmt->hexret)
1676 fprintf(trace->output, ") = %#x", ret);
1677 else
1678 goto signed_print;
1679
1680 fputc('\n', trace->output);
1681 out:
1682 ttrace->entry_pending = false;
1683
1684 return 0;
1685 }
1686
1687 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1688 struct perf_sample *sample)
1689 {
1690 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1691 return 0;
1692 }
1693
1694 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1695 struct perf_sample *sample)
1696 {
1697 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1698 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1699 struct thread *thread = machine__findnew_thread(trace->host,
1700 sample->pid,
1701 sample->tid);
1702 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1703
1704 if (ttrace == NULL)
1705 goto out_dump;
1706
1707 ttrace->runtime_ms += runtime_ms;
1708 trace->runtime_ms += runtime_ms;
1709 return 0;
1710
1711 out_dump:
1712 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1713 evsel->name,
1714 perf_evsel__strval(evsel, sample, "comm"),
1715 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1716 runtime,
1717 perf_evsel__intval(evsel, sample, "vruntime"));
1718 return 0;
1719 }
1720
1721 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1722 {
1723 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1724 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1725 return false;
1726
1727 if (trace->pid_list || trace->tid_list)
1728 return true;
1729
1730 return false;
1731 }
1732
1733 static int trace__process_sample(struct perf_tool *tool,
1734 union perf_event *event __maybe_unused,
1735 struct perf_sample *sample,
1736 struct perf_evsel *evsel,
1737 struct machine *machine __maybe_unused)
1738 {
1739 struct trace *trace = container_of(tool, struct trace, tool);
1740 int err = 0;
1741
1742 tracepoint_handler handler = evsel->handler;
1743
1744 if (skip_sample(trace, sample))
1745 return 0;
1746
1747 if (!trace->full_time && trace->base_time == 0)
1748 trace->base_time = sample->time;
1749
1750 if (handler)
1751 handler(trace, evsel, sample);
1752
1753 return err;
1754 }
1755
1756 static bool
1757 perf_session__has_tp(struct perf_session *session, const char *name)
1758 {
1759 struct perf_evsel *evsel;
1760
1761 evsel = perf_evlist__find_tracepoint_by_name(session->evlist, name);
1762
1763 return evsel != NULL;
1764 }
1765
1766 static int parse_target_str(struct trace *trace)
1767 {
1768 if (trace->opts.target.pid) {
1769 trace->pid_list = intlist__new(trace->opts.target.pid);
1770 if (trace->pid_list == NULL) {
1771 pr_err("Error parsing process id string\n");
1772 return -EINVAL;
1773 }
1774 }
1775
1776 if (trace->opts.target.tid) {
1777 trace->tid_list = intlist__new(trace->opts.target.tid);
1778 if (trace->tid_list == NULL) {
1779 pr_err("Error parsing thread id string\n");
1780 return -EINVAL;
1781 }
1782 }
1783
1784 return 0;
1785 }
1786
1787 static int trace__record(int argc, const char **argv)
1788 {
1789 unsigned int rec_argc, i, j;
1790 const char **rec_argv;
1791 const char * const record_args[] = {
1792 "record",
1793 "-R",
1794 "-m", "1024",
1795 "-c", "1",
1796 "-e", "raw_syscalls:sys_enter,raw_syscalls:sys_exit",
1797 };
1798
1799 rec_argc = ARRAY_SIZE(record_args) + argc;
1800 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1801
1802 if (rec_argv == NULL)
1803 return -ENOMEM;
1804
1805 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1806 rec_argv[i] = record_args[i];
1807
1808 for (j = 0; j < (unsigned int)argc; j++, i++)
1809 rec_argv[i] = argv[j];
1810
1811 return cmd_record(i, rec_argv, NULL);
1812 }
1813
1814 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1815
1816 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1817 {
1818 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1819 if (evsel == NULL)
1820 return;
1821
1822 if (perf_evsel__field(evsel, "pathname") == NULL) {
1823 perf_evsel__delete(evsel);
1824 return;
1825 }
1826
1827 evsel->handler = trace__vfs_getname;
1828 perf_evlist__add(evlist, evsel);
1829 }
1830
1831 static int trace__run(struct trace *trace, int argc, const char **argv)
1832 {
1833 struct perf_evlist *evlist = perf_evlist__new();
1834 struct perf_evsel *evsel;
1835 int err = -1, i;
1836 unsigned long before;
1837 const bool forks = argc > 0;
1838
1839 trace->live = true;
1840
1841 if (evlist == NULL) {
1842 fprintf(trace->output, "Not enough memory to run!\n");
1843 goto out;
1844 }
1845
1846 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1847 goto out_error_tp;
1848
1849 perf_evlist__add_vfs_getname(evlist);
1850
1851 if (trace->sched &&
1852 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1853 trace__sched_stat_runtime))
1854 goto out_error_tp;
1855
1856 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1857 if (err < 0) {
1858 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1859 goto out_delete_evlist;
1860 }
1861
1862 err = trace__symbols_init(trace, evlist);
1863 if (err < 0) {
1864 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1865 goto out_delete_maps;
1866 }
1867
1868 perf_evlist__config(evlist, &trace->opts);
1869
1870 signal(SIGCHLD, sig_handler);
1871 signal(SIGINT, sig_handler);
1872
1873 if (forks) {
1874 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1875 argv, false, false);
1876 if (err < 0) {
1877 fprintf(trace->output, "Couldn't run the workload!\n");
1878 goto out_delete_maps;
1879 }
1880 }
1881
1882 err = perf_evlist__open(evlist);
1883 if (err < 0)
1884 goto out_error_open;
1885
1886 err = perf_evlist__mmap(evlist, UINT_MAX, false);
1887 if (err < 0) {
1888 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1889 goto out_close_evlist;
1890 }
1891
1892 perf_evlist__enable(evlist);
1893
1894 if (forks)
1895 perf_evlist__start_workload(evlist);
1896
1897 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1898 again:
1899 before = trace->nr_events;
1900
1901 for (i = 0; i < evlist->nr_mmaps; i++) {
1902 union perf_event *event;
1903
1904 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1905 const u32 type = event->header.type;
1906 tracepoint_handler handler;
1907 struct perf_sample sample;
1908
1909 ++trace->nr_events;
1910
1911 err = perf_evlist__parse_sample(evlist, event, &sample);
1912 if (err) {
1913 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1914 goto next_event;
1915 }
1916
1917 if (!trace->full_time && trace->base_time == 0)
1918 trace->base_time = sample.time;
1919
1920 if (type != PERF_RECORD_SAMPLE) {
1921 trace__process_event(trace, trace->host, event, &sample);
1922 continue;
1923 }
1924
1925 evsel = perf_evlist__id2evsel(evlist, sample.id);
1926 if (evsel == NULL) {
1927 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1928 goto next_event;
1929 }
1930
1931 if (sample.raw_data == NULL) {
1932 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1933 perf_evsel__name(evsel), sample.tid,
1934 sample.cpu, sample.raw_size);
1935 goto next_event;
1936 }
1937
1938 handler = evsel->handler;
1939 handler(trace, evsel, &sample);
1940 next_event:
1941 perf_evlist__mmap_consume(evlist, i);
1942
1943 if (interrupted)
1944 goto out_disable;
1945 }
1946 }
1947
1948 if (trace->nr_events == before) {
1949 int timeout = done ? 100 : -1;
1950
1951 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1952 goto again;
1953 } else {
1954 goto again;
1955 }
1956
1957 out_disable:
1958 perf_evlist__disable(evlist);
1959
1960 if (!err) {
1961 if (trace->summary)
1962 trace__fprintf_thread_summary(trace, trace->output);
1963
1964 if (trace->show_tool_stats) {
1965 fprintf(trace->output, "Stats:\n "
1966 " vfs_getname : %" PRIu64 "\n"
1967 " proc_getname: %" PRIu64 "\n",
1968 trace->stats.vfs_getname,
1969 trace->stats.proc_getname);
1970 }
1971 }
1972
1973 perf_evlist__munmap(evlist);
1974 out_close_evlist:
1975 perf_evlist__close(evlist);
1976 out_delete_maps:
1977 perf_evlist__delete_maps(evlist);
1978 out_delete_evlist:
1979 perf_evlist__delete(evlist);
1980 out:
1981 trace->live = false;
1982 return err;
1983 {
1984 char errbuf[BUFSIZ];
1985
1986 out_error_tp:
1987 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
1988 goto out_error;
1989
1990 out_error_open:
1991 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
1992
1993 out_error:
1994 fprintf(trace->output, "%s\n", errbuf);
1995 goto out_delete_evlist;
1996 }
1997 }
1998
1999 static int trace__replay(struct trace *trace)
2000 {
2001 const struct perf_evsel_str_handler handlers[] = {
2002 { "raw_syscalls:sys_enter", trace__sys_enter, },
2003 { "raw_syscalls:sys_exit", trace__sys_exit, },
2004 { "probe:vfs_getname", trace__vfs_getname, },
2005 };
2006 struct perf_data_file file = {
2007 .path = input_name,
2008 .mode = PERF_DATA_MODE_READ,
2009 };
2010 struct perf_session *session;
2011 int err = -1;
2012
2013 trace->tool.sample = trace__process_sample;
2014 trace->tool.mmap = perf_event__process_mmap;
2015 trace->tool.mmap2 = perf_event__process_mmap2;
2016 trace->tool.comm = perf_event__process_comm;
2017 trace->tool.exit = perf_event__process_exit;
2018 trace->tool.fork = perf_event__process_fork;
2019 trace->tool.attr = perf_event__process_attr;
2020 trace->tool.tracing_data = perf_event__process_tracing_data;
2021 trace->tool.build_id = perf_event__process_build_id;
2022
2023 trace->tool.ordered_samples = true;
2024 trace->tool.ordering_requires_timestamps = true;
2025
2026 /* add tid to output */
2027 trace->multiple_threads = true;
2028
2029 if (symbol__init() < 0)
2030 return -1;
2031
2032 session = perf_session__new(&file, false, &trace->tool);
2033 if (session == NULL)
2034 return -ENOMEM;
2035
2036 trace->host = &session->machines.host;
2037
2038 err = perf_session__set_tracepoints_handlers(session, handlers);
2039 if (err)
2040 goto out;
2041
2042 if (!perf_session__has_tp(session, "raw_syscalls:sys_enter")) {
2043 pr_err("Data file does not have raw_syscalls:sys_enter events\n");
2044 goto out;
2045 }
2046
2047 if (!perf_session__has_tp(session, "raw_syscalls:sys_exit")) {
2048 pr_err("Data file does not have raw_syscalls:sys_exit events\n");
2049 goto out;
2050 }
2051
2052 err = parse_target_str(trace);
2053 if (err != 0)
2054 goto out;
2055
2056 setup_pager();
2057
2058 err = perf_session__process_events(session, &trace->tool);
2059 if (err)
2060 pr_err("Failed to process events, error %d", err);
2061
2062 else if (trace->summary)
2063 trace__fprintf_thread_summary(trace, trace->output);
2064
2065 out:
2066 perf_session__delete(session);
2067
2068 return err;
2069 }
2070
2071 static size_t trace__fprintf_threads_header(FILE *fp)
2072 {
2073 size_t printed;
2074
2075 printed = fprintf(fp, "\n _____________________________________________________________________________\n");
2076 printed += fprintf(fp, " __) Summary of events (__\n\n");
2077 printed += fprintf(fp, " [ task - pid ] [ events ] [ ratio ] [ runtime ]\n");
2078 printed += fprintf(fp, " syscall count min max avg stddev\n");
2079 printed += fprintf(fp, " msec msec msec %%\n");
2080 printed += fprintf(fp, " _____________________________________________________________________________\n\n");
2081
2082 return printed;
2083 }
2084
2085 static size_t thread__dump_stats(struct thread_trace *ttrace,
2086 struct trace *trace, FILE *fp)
2087 {
2088 struct stats *stats;
2089 size_t printed = 0;
2090 struct syscall *sc;
2091 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2092
2093 if (inode == NULL)
2094 return 0;
2095
2096 printed += fprintf(fp, "\n");
2097
2098 /* each int_node is a syscall */
2099 while (inode) {
2100 stats = inode->priv;
2101 if (stats) {
2102 double min = (double)(stats->min) / NSEC_PER_MSEC;
2103 double max = (double)(stats->max) / NSEC_PER_MSEC;
2104 double avg = avg_stats(stats);
2105 double pct;
2106 u64 n = (u64) stats->n;
2107
2108 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2109 avg /= NSEC_PER_MSEC;
2110
2111 sc = &trace->syscalls.table[inode->i];
2112 printed += fprintf(fp, "%24s %14s : ", "", sc->name);
2113 printed += fprintf(fp, "%5" PRIu64 " %8.3f %8.3f",
2114 n, min, max);
2115 printed += fprintf(fp, " %8.3f %6.2f\n", avg, pct);
2116 }
2117
2118 inode = intlist__next(inode);
2119 }
2120
2121 printed += fprintf(fp, "\n\n");
2122
2123 return printed;
2124 }
2125
2126 /* struct used to pass data to per-thread function */
2127 struct summary_data {
2128 FILE *fp;
2129 struct trace *trace;
2130 size_t printed;
2131 };
2132
2133 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2134 {
2135 struct summary_data *data = priv;
2136 FILE *fp = data->fp;
2137 size_t printed = data->printed;
2138 struct trace *trace = data->trace;
2139 struct thread_trace *ttrace = thread->priv;
2140 const char *color;
2141 double ratio;
2142
2143 if (ttrace == NULL)
2144 return 0;
2145
2146 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2147
2148 color = PERF_COLOR_NORMAL;
2149 if (ratio > 50.0)
2150 color = PERF_COLOR_RED;
2151 else if (ratio > 25.0)
2152 color = PERF_COLOR_GREEN;
2153 else if (ratio > 5.0)
2154 color = PERF_COLOR_YELLOW;
2155
2156 printed += color_fprintf(fp, color, "%20s", thread__comm_str(thread));
2157 printed += fprintf(fp, " - %-5d :%11lu [", thread->tid, ttrace->nr_events);
2158 printed += color_fprintf(fp, color, "%5.1f%%", ratio);
2159 printed += fprintf(fp, " ] %10.3f ms\n", ttrace->runtime_ms);
2160 printed += thread__dump_stats(ttrace, trace, fp);
2161
2162 data->printed += printed;
2163
2164 return 0;
2165 }
2166
2167 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2168 {
2169 struct summary_data data = {
2170 .fp = fp,
2171 .trace = trace
2172 };
2173 data.printed = trace__fprintf_threads_header(fp);
2174
2175 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2176
2177 return data.printed;
2178 }
2179
2180 static int trace__set_duration(const struct option *opt, const char *str,
2181 int unset __maybe_unused)
2182 {
2183 struct trace *trace = opt->value;
2184
2185 trace->duration_filter = atof(str);
2186 return 0;
2187 }
2188
2189 static int trace__open_output(struct trace *trace, const char *filename)
2190 {
2191 struct stat st;
2192
2193 if (!stat(filename, &st) && st.st_size) {
2194 char oldname[PATH_MAX];
2195
2196 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2197 unlink(oldname);
2198 rename(filename, oldname);
2199 }
2200
2201 trace->output = fopen(filename, "w");
2202
2203 return trace->output == NULL ? -errno : 0;
2204 }
2205
2206 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2207 {
2208 const char * const trace_usage[] = {
2209 "perf trace [<options>] [<command>]",
2210 "perf trace [<options>] -- <command> [<options>]",
2211 "perf trace record [<options>] [<command>]",
2212 "perf trace record [<options>] -- <command> [<options>]",
2213 NULL
2214 };
2215 struct trace trace = {
2216 .audit = {
2217 .machine = audit_detect_machine(),
2218 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2219 },
2220 .syscalls = {
2221 . max = -1,
2222 },
2223 .opts = {
2224 .target = {
2225 .uid = UINT_MAX,
2226 .uses_mmap = true,
2227 },
2228 .user_freq = UINT_MAX,
2229 .user_interval = ULLONG_MAX,
2230 .no_delay = true,
2231 .mmap_pages = 1024,
2232 },
2233 .output = stdout,
2234 .show_comm = true,
2235 };
2236 const char *output_name = NULL;
2237 const char *ev_qualifier_str = NULL;
2238 const struct option trace_options[] = {
2239 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2240 "show the thread COMM next to its id"),
2241 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2242 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2243 "list of events to trace"),
2244 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2245 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2246 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2247 "trace events on existing process id"),
2248 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2249 "trace events on existing thread id"),
2250 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2251 "system-wide collection from all CPUs"),
2252 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2253 "list of cpus to monitor"),
2254 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2255 "child tasks do not inherit counters"),
2256 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2257 "number of mmap data pages",
2258 perf_evlist__parse_mmap_pages),
2259 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2260 "user to profile"),
2261 OPT_CALLBACK(0, "duration", &trace, "float",
2262 "show only events with duration > N.M ms",
2263 trace__set_duration),
2264 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2265 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2266 OPT_BOOLEAN('T', "time", &trace.full_time,
2267 "Show full timestamp, not time relative to first start"),
2268 OPT_BOOLEAN(0, "summary", &trace.summary,
2269 "Show syscall summary with statistics"),
2270 OPT_END()
2271 };
2272 int err;
2273 char bf[BUFSIZ];
2274
2275 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2276 return trace__record(argc-2, &argv[2]);
2277
2278 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2279
2280 if (output_name != NULL) {
2281 err = trace__open_output(&trace, output_name);
2282 if (err < 0) {
2283 perror("failed to create output file");
2284 goto out;
2285 }
2286 }
2287
2288 if (ev_qualifier_str != NULL) {
2289 const char *s = ev_qualifier_str;
2290
2291 trace.not_ev_qualifier = *s == '!';
2292 if (trace.not_ev_qualifier)
2293 ++s;
2294 trace.ev_qualifier = strlist__new(true, s);
2295 if (trace.ev_qualifier == NULL) {
2296 fputs("Not enough memory to parse event qualifier",
2297 trace.output);
2298 err = -ENOMEM;
2299 goto out_close;
2300 }
2301 }
2302
2303 err = perf_target__validate(&trace.opts.target);
2304 if (err) {
2305 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2306 fprintf(trace.output, "%s", bf);
2307 goto out_close;
2308 }
2309
2310 err = perf_target__parse_uid(&trace.opts.target);
2311 if (err) {
2312 perf_target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2313 fprintf(trace.output, "%s", bf);
2314 goto out_close;
2315 }
2316
2317 if (!argc && perf_target__none(&trace.opts.target))
2318 trace.opts.target.system_wide = true;
2319
2320 if (input_name)
2321 err = trace__replay(&trace);
2322 else
2323 err = trace__run(&trace, argc, argv);
2324
2325 out_close:
2326 if (output_name != NULL)
2327 fclose(trace.output);
2328 out:
2329 return err;
2330 }