]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - tools/perf/builtin-trace.c
perf tools: Introduce zfree
[mirror_ubuntu-artful-kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK 0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON 100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
38 #endif
39
40 struct tp_field {
41 int offset;
42 union {
43 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
44 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
45 };
46 };
47
48 #define TP_UINT_FIELD(bits) \
49 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
50 { \
51 return *(u##bits *)(sample->raw_data + field->offset); \
52 }
53
54 TP_UINT_FIELD(8);
55 TP_UINT_FIELD(16);
56 TP_UINT_FIELD(32);
57 TP_UINT_FIELD(64);
58
59 #define TP_UINT_FIELD__SWAPPED(bits) \
60 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
61 { \
62 u##bits value = *(u##bits *)(sample->raw_data + field->offset); \
63 return bswap_##bits(value);\
64 }
65
66 TP_UINT_FIELD__SWAPPED(16);
67 TP_UINT_FIELD__SWAPPED(32);
68 TP_UINT_FIELD__SWAPPED(64);
69
70 static int tp_field__init_uint(struct tp_field *field,
71 struct format_field *format_field,
72 bool needs_swap)
73 {
74 field->offset = format_field->offset;
75
76 switch (format_field->size) {
77 case 1:
78 field->integer = tp_field__u8;
79 break;
80 case 2:
81 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
82 break;
83 case 4:
84 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
85 break;
86 case 8:
87 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
88 break;
89 default:
90 return -1;
91 }
92
93 return 0;
94 }
95
96 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
97 {
98 return sample->raw_data + field->offset;
99 }
100
101 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
102 {
103 field->offset = format_field->offset;
104 field->pointer = tp_field__ptr;
105 return 0;
106 }
107
108 struct syscall_tp {
109 struct tp_field id;
110 union {
111 struct tp_field args, ret;
112 };
113 };
114
115 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
116 struct tp_field *field,
117 const char *name)
118 {
119 struct format_field *format_field = perf_evsel__field(evsel, name);
120
121 if (format_field == NULL)
122 return -1;
123
124 return tp_field__init_uint(field, format_field, evsel->needs_swap);
125 }
126
127 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
128 ({ struct syscall_tp *sc = evsel->priv;\
129 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
130
131 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
132 struct tp_field *field,
133 const char *name)
134 {
135 struct format_field *format_field = perf_evsel__field(evsel, name);
136
137 if (format_field == NULL)
138 return -1;
139
140 return tp_field__init_ptr(field, format_field);
141 }
142
143 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
144 ({ struct syscall_tp *sc = evsel->priv;\
145 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
146
147 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
148 {
149 zfree(&evsel->priv);
150 perf_evsel__delete(evsel);
151 }
152
153 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
154 {
155 evsel->priv = malloc(sizeof(struct syscall_tp));
156 if (evsel->priv != NULL) {
157 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
158 goto out_delete;
159
160 evsel->handler = handler;
161 return 0;
162 }
163
164 return -ENOMEM;
165
166 out_delete:
167 zfree(&evsel->priv);
168 return -ENOENT;
169 }
170
171 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
172 {
173 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
174
175 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
176 if (evsel == NULL)
177 evsel = perf_evsel__newtp("syscalls", direction);
178
179 if (evsel) {
180 if (perf_evsel__init_syscall_tp(evsel, handler))
181 goto out_delete;
182 }
183
184 return evsel;
185
186 out_delete:
187 perf_evsel__delete_priv(evsel);
188 return NULL;
189 }
190
191 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
192 ({ struct syscall_tp *fields = evsel->priv; \
193 fields->name.integer(&fields->name, sample); })
194
195 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
196 ({ struct syscall_tp *fields = evsel->priv; \
197 fields->name.pointer(&fields->name, sample); })
198
199 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
200 void *sys_enter_handler,
201 void *sys_exit_handler)
202 {
203 int ret = -1;
204 struct perf_evsel *sys_enter, *sys_exit;
205
206 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
207 if (sys_enter == NULL)
208 goto out;
209
210 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
211 goto out_delete_sys_enter;
212
213 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
214 if (sys_exit == NULL)
215 goto out_delete_sys_enter;
216
217 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
218 goto out_delete_sys_exit;
219
220 perf_evlist__add(evlist, sys_enter);
221 perf_evlist__add(evlist, sys_exit);
222
223 ret = 0;
224 out:
225 return ret;
226
227 out_delete_sys_exit:
228 perf_evsel__delete_priv(sys_exit);
229 out_delete_sys_enter:
230 perf_evsel__delete_priv(sys_enter);
231 goto out;
232 }
233
234
235 struct syscall_arg {
236 unsigned long val;
237 struct thread *thread;
238 struct trace *trace;
239 void *parm;
240 u8 idx;
241 u8 mask;
242 };
243
244 struct strarray {
245 int offset;
246 int nr_entries;
247 const char **entries;
248 };
249
250 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
251 .nr_entries = ARRAY_SIZE(array), \
252 .entries = array, \
253 }
254
255 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
256 .offset = off, \
257 .nr_entries = ARRAY_SIZE(array), \
258 .entries = array, \
259 }
260
261 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
262 const char *intfmt,
263 struct syscall_arg *arg)
264 {
265 struct strarray *sa = arg->parm;
266 int idx = arg->val - sa->offset;
267
268 if (idx < 0 || idx >= sa->nr_entries)
269 return scnprintf(bf, size, intfmt, arg->val);
270
271 return scnprintf(bf, size, "%s", sa->entries[idx]);
272 }
273
274 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
275 struct syscall_arg *arg)
276 {
277 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
278 }
279
280 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
281
282 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
283 struct syscall_arg *arg)
284 {
285 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
286 }
287
288 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
289
290 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
291 struct syscall_arg *arg);
292
293 #define SCA_FD syscall_arg__scnprintf_fd
294
295 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
296 struct syscall_arg *arg)
297 {
298 int fd = arg->val;
299
300 if (fd == AT_FDCWD)
301 return scnprintf(bf, size, "CWD");
302
303 return syscall_arg__scnprintf_fd(bf, size, arg);
304 }
305
306 #define SCA_FDAT syscall_arg__scnprintf_fd_at
307
308 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
309 struct syscall_arg *arg);
310
311 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
312
313 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
314 struct syscall_arg *arg)
315 {
316 return scnprintf(bf, size, "%#lx", arg->val);
317 }
318
319 #define SCA_HEX syscall_arg__scnprintf_hex
320
321 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
322 struct syscall_arg *arg)
323 {
324 int printed = 0, prot = arg->val;
325
326 if (prot == PROT_NONE)
327 return scnprintf(bf, size, "NONE");
328 #define P_MMAP_PROT(n) \
329 if (prot & PROT_##n) { \
330 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
331 prot &= ~PROT_##n; \
332 }
333
334 P_MMAP_PROT(EXEC);
335 P_MMAP_PROT(READ);
336 P_MMAP_PROT(WRITE);
337 #ifdef PROT_SEM
338 P_MMAP_PROT(SEM);
339 #endif
340 P_MMAP_PROT(GROWSDOWN);
341 P_MMAP_PROT(GROWSUP);
342 #undef P_MMAP_PROT
343
344 if (prot)
345 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
346
347 return printed;
348 }
349
350 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
351
352 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
353 struct syscall_arg *arg)
354 {
355 int printed = 0, flags = arg->val;
356
357 #define P_MMAP_FLAG(n) \
358 if (flags & MAP_##n) { \
359 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
360 flags &= ~MAP_##n; \
361 }
362
363 P_MMAP_FLAG(SHARED);
364 P_MMAP_FLAG(PRIVATE);
365 #ifdef MAP_32BIT
366 P_MMAP_FLAG(32BIT);
367 #endif
368 P_MMAP_FLAG(ANONYMOUS);
369 P_MMAP_FLAG(DENYWRITE);
370 P_MMAP_FLAG(EXECUTABLE);
371 P_MMAP_FLAG(FILE);
372 P_MMAP_FLAG(FIXED);
373 P_MMAP_FLAG(GROWSDOWN);
374 #ifdef MAP_HUGETLB
375 P_MMAP_FLAG(HUGETLB);
376 #endif
377 P_MMAP_FLAG(LOCKED);
378 P_MMAP_FLAG(NONBLOCK);
379 P_MMAP_FLAG(NORESERVE);
380 P_MMAP_FLAG(POPULATE);
381 P_MMAP_FLAG(STACK);
382 #ifdef MAP_UNINITIALIZED
383 P_MMAP_FLAG(UNINITIALIZED);
384 #endif
385 #undef P_MMAP_FLAG
386
387 if (flags)
388 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
389
390 return printed;
391 }
392
393 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
394
395 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
396 struct syscall_arg *arg)
397 {
398 int behavior = arg->val;
399
400 switch (behavior) {
401 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
402 P_MADV_BHV(NORMAL);
403 P_MADV_BHV(RANDOM);
404 P_MADV_BHV(SEQUENTIAL);
405 P_MADV_BHV(WILLNEED);
406 P_MADV_BHV(DONTNEED);
407 P_MADV_BHV(REMOVE);
408 P_MADV_BHV(DONTFORK);
409 P_MADV_BHV(DOFORK);
410 P_MADV_BHV(HWPOISON);
411 #ifdef MADV_SOFT_OFFLINE
412 P_MADV_BHV(SOFT_OFFLINE);
413 #endif
414 P_MADV_BHV(MERGEABLE);
415 P_MADV_BHV(UNMERGEABLE);
416 #ifdef MADV_HUGEPAGE
417 P_MADV_BHV(HUGEPAGE);
418 #endif
419 #ifdef MADV_NOHUGEPAGE
420 P_MADV_BHV(NOHUGEPAGE);
421 #endif
422 #ifdef MADV_DONTDUMP
423 P_MADV_BHV(DONTDUMP);
424 #endif
425 #ifdef MADV_DODUMP
426 P_MADV_BHV(DODUMP);
427 #endif
428 #undef P_MADV_PHV
429 default: break;
430 }
431
432 return scnprintf(bf, size, "%#x", behavior);
433 }
434
435 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
436
437 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
438 struct syscall_arg *arg)
439 {
440 int printed = 0, op = arg->val;
441
442 if (op == 0)
443 return scnprintf(bf, size, "NONE");
444 #define P_CMD(cmd) \
445 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
446 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
447 op &= ~LOCK_##cmd; \
448 }
449
450 P_CMD(SH);
451 P_CMD(EX);
452 P_CMD(NB);
453 P_CMD(UN);
454 P_CMD(MAND);
455 P_CMD(RW);
456 P_CMD(READ);
457 P_CMD(WRITE);
458 #undef P_OP
459
460 if (op)
461 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
462
463 return printed;
464 }
465
466 #define SCA_FLOCK syscall_arg__scnprintf_flock
467
468 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
469 {
470 enum syscall_futex_args {
471 SCF_UADDR = (1 << 0),
472 SCF_OP = (1 << 1),
473 SCF_VAL = (1 << 2),
474 SCF_TIMEOUT = (1 << 3),
475 SCF_UADDR2 = (1 << 4),
476 SCF_VAL3 = (1 << 5),
477 };
478 int op = arg->val;
479 int cmd = op & FUTEX_CMD_MASK;
480 size_t printed = 0;
481
482 switch (cmd) {
483 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
484 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
485 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
486 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
487 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
488 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
489 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
490 P_FUTEX_OP(WAKE_OP); break;
491 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
492 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
493 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
494 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
495 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
496 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
497 default: printed = scnprintf(bf, size, "%#x", cmd); break;
498 }
499
500 if (op & FUTEX_PRIVATE_FLAG)
501 printed += scnprintf(bf + printed, size - printed, "|PRIV");
502
503 if (op & FUTEX_CLOCK_REALTIME)
504 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
505
506 return printed;
507 }
508
509 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
510
511 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
512 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
513
514 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
515 static DEFINE_STRARRAY(itimers);
516
517 static const char *whences[] = { "SET", "CUR", "END",
518 #ifdef SEEK_DATA
519 "DATA",
520 #endif
521 #ifdef SEEK_HOLE
522 "HOLE",
523 #endif
524 };
525 static DEFINE_STRARRAY(whences);
526
527 static const char *fcntl_cmds[] = {
528 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
529 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
530 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
531 "F_GETOWNER_UIDS",
532 };
533 static DEFINE_STRARRAY(fcntl_cmds);
534
535 static const char *rlimit_resources[] = {
536 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
537 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
538 "RTTIME",
539 };
540 static DEFINE_STRARRAY(rlimit_resources);
541
542 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
543 static DEFINE_STRARRAY(sighow);
544
545 static const char *clockid[] = {
546 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
547 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
548 };
549 static DEFINE_STRARRAY(clockid);
550
551 static const char *socket_families[] = {
552 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
553 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
554 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
555 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
556 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
557 "ALG", "NFC", "VSOCK",
558 };
559 static DEFINE_STRARRAY(socket_families);
560
561 #ifndef SOCK_TYPE_MASK
562 #define SOCK_TYPE_MASK 0xf
563 #endif
564
565 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
566 struct syscall_arg *arg)
567 {
568 size_t printed;
569 int type = arg->val,
570 flags = type & ~SOCK_TYPE_MASK;
571
572 type &= SOCK_TYPE_MASK;
573 /*
574 * Can't use a strarray, MIPS may override for ABI reasons.
575 */
576 switch (type) {
577 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
578 P_SK_TYPE(STREAM);
579 P_SK_TYPE(DGRAM);
580 P_SK_TYPE(RAW);
581 P_SK_TYPE(RDM);
582 P_SK_TYPE(SEQPACKET);
583 P_SK_TYPE(DCCP);
584 P_SK_TYPE(PACKET);
585 #undef P_SK_TYPE
586 default:
587 printed = scnprintf(bf, size, "%#x", type);
588 }
589
590 #define P_SK_FLAG(n) \
591 if (flags & SOCK_##n) { \
592 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
593 flags &= ~SOCK_##n; \
594 }
595
596 P_SK_FLAG(CLOEXEC);
597 P_SK_FLAG(NONBLOCK);
598 #undef P_SK_FLAG
599
600 if (flags)
601 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
602
603 return printed;
604 }
605
606 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
607
608 #ifndef MSG_PROBE
609 #define MSG_PROBE 0x10
610 #endif
611 #ifndef MSG_WAITFORONE
612 #define MSG_WAITFORONE 0x10000
613 #endif
614 #ifndef MSG_SENDPAGE_NOTLAST
615 #define MSG_SENDPAGE_NOTLAST 0x20000
616 #endif
617 #ifndef MSG_FASTOPEN
618 #define MSG_FASTOPEN 0x20000000
619 #endif
620
621 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
622 struct syscall_arg *arg)
623 {
624 int printed = 0, flags = arg->val;
625
626 if (flags == 0)
627 return scnprintf(bf, size, "NONE");
628 #define P_MSG_FLAG(n) \
629 if (flags & MSG_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
631 flags &= ~MSG_##n; \
632 }
633
634 P_MSG_FLAG(OOB);
635 P_MSG_FLAG(PEEK);
636 P_MSG_FLAG(DONTROUTE);
637 P_MSG_FLAG(TRYHARD);
638 P_MSG_FLAG(CTRUNC);
639 P_MSG_FLAG(PROBE);
640 P_MSG_FLAG(TRUNC);
641 P_MSG_FLAG(DONTWAIT);
642 P_MSG_FLAG(EOR);
643 P_MSG_FLAG(WAITALL);
644 P_MSG_FLAG(FIN);
645 P_MSG_FLAG(SYN);
646 P_MSG_FLAG(CONFIRM);
647 P_MSG_FLAG(RST);
648 P_MSG_FLAG(ERRQUEUE);
649 P_MSG_FLAG(NOSIGNAL);
650 P_MSG_FLAG(MORE);
651 P_MSG_FLAG(WAITFORONE);
652 P_MSG_FLAG(SENDPAGE_NOTLAST);
653 P_MSG_FLAG(FASTOPEN);
654 P_MSG_FLAG(CMSG_CLOEXEC);
655 #undef P_MSG_FLAG
656
657 if (flags)
658 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
659
660 return printed;
661 }
662
663 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
664
665 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
666 struct syscall_arg *arg)
667 {
668 size_t printed = 0;
669 int mode = arg->val;
670
671 if (mode == F_OK) /* 0 */
672 return scnprintf(bf, size, "F");
673 #define P_MODE(n) \
674 if (mode & n##_OK) { \
675 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
676 mode &= ~n##_OK; \
677 }
678
679 P_MODE(R);
680 P_MODE(W);
681 P_MODE(X);
682 #undef P_MODE
683
684 if (mode)
685 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
686
687 return printed;
688 }
689
690 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
691
692 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
693 struct syscall_arg *arg)
694 {
695 int printed = 0, flags = arg->val;
696
697 if (!(flags & O_CREAT))
698 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
699
700 if (flags == 0)
701 return scnprintf(bf, size, "RDONLY");
702 #define P_FLAG(n) \
703 if (flags & O_##n) { \
704 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
705 flags &= ~O_##n; \
706 }
707
708 P_FLAG(APPEND);
709 P_FLAG(ASYNC);
710 P_FLAG(CLOEXEC);
711 P_FLAG(CREAT);
712 P_FLAG(DIRECT);
713 P_FLAG(DIRECTORY);
714 P_FLAG(EXCL);
715 P_FLAG(LARGEFILE);
716 P_FLAG(NOATIME);
717 P_FLAG(NOCTTY);
718 #ifdef O_NONBLOCK
719 P_FLAG(NONBLOCK);
720 #elif O_NDELAY
721 P_FLAG(NDELAY);
722 #endif
723 #ifdef O_PATH
724 P_FLAG(PATH);
725 #endif
726 P_FLAG(RDWR);
727 #ifdef O_DSYNC
728 if ((flags & O_SYNC) == O_SYNC)
729 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
730 else {
731 P_FLAG(DSYNC);
732 }
733 #else
734 P_FLAG(SYNC);
735 #endif
736 P_FLAG(TRUNC);
737 P_FLAG(WRONLY);
738 #undef P_FLAG
739
740 if (flags)
741 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
742
743 return printed;
744 }
745
746 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
747
748 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
749 struct syscall_arg *arg)
750 {
751 int printed = 0, flags = arg->val;
752
753 if (flags == 0)
754 return scnprintf(bf, size, "NONE");
755 #define P_FLAG(n) \
756 if (flags & EFD_##n) { \
757 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
758 flags &= ~EFD_##n; \
759 }
760
761 P_FLAG(SEMAPHORE);
762 P_FLAG(CLOEXEC);
763 P_FLAG(NONBLOCK);
764 #undef P_FLAG
765
766 if (flags)
767 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
768
769 return printed;
770 }
771
772 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
773
774 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
775 struct syscall_arg *arg)
776 {
777 int printed = 0, flags = arg->val;
778
779 #define P_FLAG(n) \
780 if (flags & O_##n) { \
781 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
782 flags &= ~O_##n; \
783 }
784
785 P_FLAG(CLOEXEC);
786 P_FLAG(NONBLOCK);
787 #undef P_FLAG
788
789 if (flags)
790 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
791
792 return printed;
793 }
794
795 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
796
797 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
798 {
799 int sig = arg->val;
800
801 switch (sig) {
802 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
803 P_SIGNUM(HUP);
804 P_SIGNUM(INT);
805 P_SIGNUM(QUIT);
806 P_SIGNUM(ILL);
807 P_SIGNUM(TRAP);
808 P_SIGNUM(ABRT);
809 P_SIGNUM(BUS);
810 P_SIGNUM(FPE);
811 P_SIGNUM(KILL);
812 P_SIGNUM(USR1);
813 P_SIGNUM(SEGV);
814 P_SIGNUM(USR2);
815 P_SIGNUM(PIPE);
816 P_SIGNUM(ALRM);
817 P_SIGNUM(TERM);
818 P_SIGNUM(STKFLT);
819 P_SIGNUM(CHLD);
820 P_SIGNUM(CONT);
821 P_SIGNUM(STOP);
822 P_SIGNUM(TSTP);
823 P_SIGNUM(TTIN);
824 P_SIGNUM(TTOU);
825 P_SIGNUM(URG);
826 P_SIGNUM(XCPU);
827 P_SIGNUM(XFSZ);
828 P_SIGNUM(VTALRM);
829 P_SIGNUM(PROF);
830 P_SIGNUM(WINCH);
831 P_SIGNUM(IO);
832 P_SIGNUM(PWR);
833 P_SIGNUM(SYS);
834 default: break;
835 }
836
837 return scnprintf(bf, size, "%#x", sig);
838 }
839
840 #define SCA_SIGNUM syscall_arg__scnprintf_signum
841
842 #define TCGETS 0x5401
843
844 static const char *tioctls[] = {
845 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
846 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
847 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
848 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
849 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
850 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
851 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
852 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
853 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
854 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
855 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
856 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
857 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
858 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
859 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
860 };
861
862 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
863
864 #define STRARRAY(arg, name, array) \
865 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
866 .arg_parm = { [arg] = &strarray__##array, }
867
868 static struct syscall_fmt {
869 const char *name;
870 const char *alias;
871 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
872 void *arg_parm[6];
873 bool errmsg;
874 bool timeout;
875 bool hexret;
876 } syscall_fmts[] = {
877 { .name = "access", .errmsg = true,
878 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
879 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
880 { .name = "brk", .hexret = true,
881 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
882 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
883 { .name = "close", .errmsg = true,
884 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
885 { .name = "connect", .errmsg = true, },
886 { .name = "dup", .errmsg = true,
887 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
888 { .name = "dup2", .errmsg = true,
889 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
890 { .name = "dup3", .errmsg = true,
891 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
892 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
893 { .name = "eventfd2", .errmsg = true,
894 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
895 { .name = "faccessat", .errmsg = true,
896 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
897 { .name = "fadvise64", .errmsg = true,
898 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
899 { .name = "fallocate", .errmsg = true,
900 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
901 { .name = "fchdir", .errmsg = true,
902 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
903 { .name = "fchmod", .errmsg = true,
904 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
905 { .name = "fchmodat", .errmsg = true,
906 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
907 { .name = "fchown", .errmsg = true,
908 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
909 { .name = "fchownat", .errmsg = true,
910 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
911 { .name = "fcntl", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FD, /* fd */
913 [1] = SCA_STRARRAY, /* cmd */ },
914 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
915 { .name = "fdatasync", .errmsg = true,
916 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
917 { .name = "flock", .errmsg = true,
918 .arg_scnprintf = { [0] = SCA_FD, /* fd */
919 [1] = SCA_FLOCK, /* cmd */ }, },
920 { .name = "fsetxattr", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
922 { .name = "fstat", .errmsg = true, .alias = "newfstat",
923 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
924 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
925 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
926 { .name = "fstatfs", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
928 { .name = "fsync", .errmsg = true,
929 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
930 { .name = "ftruncate", .errmsg = true,
931 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
932 { .name = "futex", .errmsg = true,
933 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
934 { .name = "futimesat", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
936 { .name = "getdents", .errmsg = true,
937 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
938 { .name = "getdents64", .errmsg = true,
939 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
940 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
941 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
942 { .name = "ioctl", .errmsg = true,
943 .arg_scnprintf = { [0] = SCA_FD, /* fd */
944 [1] = SCA_STRHEXARRAY, /* cmd */
945 [2] = SCA_HEX, /* arg */ },
946 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
947 { .name = "kill", .errmsg = true,
948 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
949 { .name = "linkat", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
951 { .name = "lseek", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FD, /* fd */
953 [2] = SCA_STRARRAY, /* whence */ },
954 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
955 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
956 { .name = "madvise", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_HEX, /* start */
958 [2] = SCA_MADV_BHV, /* behavior */ }, },
959 { .name = "mkdirat", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
961 { .name = "mknodat", .errmsg = true,
962 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
963 { .name = "mlock", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
965 { .name = "mlockall", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
967 { .name = "mmap", .hexret = true,
968 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
969 [2] = SCA_MMAP_PROT, /* prot */
970 [3] = SCA_MMAP_FLAGS, /* flags */
971 [4] = SCA_FD, /* fd */ }, },
972 { .name = "mprotect", .errmsg = true,
973 .arg_scnprintf = { [0] = SCA_HEX, /* start */
974 [2] = SCA_MMAP_PROT, /* prot */ }, },
975 { .name = "mremap", .hexret = true,
976 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
977 [4] = SCA_HEX, /* new_addr */ }, },
978 { .name = "munlock", .errmsg = true,
979 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
980 { .name = "munmap", .errmsg = true,
981 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
982 { .name = "name_to_handle_at", .errmsg = true,
983 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
984 { .name = "newfstatat", .errmsg = true,
985 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
986 { .name = "open", .errmsg = true,
987 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
988 { .name = "open_by_handle_at", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
990 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
991 { .name = "openat", .errmsg = true,
992 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
993 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
994 { .name = "pipe2", .errmsg = true,
995 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
996 { .name = "poll", .errmsg = true, .timeout = true, },
997 { .name = "ppoll", .errmsg = true, .timeout = true, },
998 { .name = "pread", .errmsg = true, .alias = "pread64",
999 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1000 { .name = "preadv", .errmsg = true, .alias = "pread",
1001 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1002 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1003 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1004 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1005 { .name = "pwritev", .errmsg = true,
1006 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1007 { .name = "read", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1009 { .name = "readlinkat", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1011 { .name = "readv", .errmsg = true,
1012 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1013 { .name = "recvfrom", .errmsg = true,
1014 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1015 { .name = "recvmmsg", .errmsg = true,
1016 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1017 { .name = "recvmsg", .errmsg = true,
1018 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1019 { .name = "renameat", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1021 { .name = "rt_sigaction", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1023 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1024 { .name = "rt_sigqueueinfo", .errmsg = true,
1025 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1026 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1027 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1028 { .name = "select", .errmsg = true, .timeout = true, },
1029 { .name = "sendmmsg", .errmsg = true,
1030 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1031 { .name = "sendmsg", .errmsg = true,
1032 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1033 { .name = "sendto", .errmsg = true,
1034 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1035 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1036 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1037 { .name = "shutdown", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1039 { .name = "socket", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1041 [1] = SCA_SK_TYPE, /* type */ },
1042 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1043 { .name = "socketpair", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1045 [1] = SCA_SK_TYPE, /* type */ },
1046 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1047 { .name = "stat", .errmsg = true, .alias = "newstat", },
1048 { .name = "symlinkat", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1050 { .name = "tgkill", .errmsg = true,
1051 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1052 { .name = "tkill", .errmsg = true,
1053 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1054 { .name = "uname", .errmsg = true, .alias = "newuname", },
1055 { .name = "unlinkat", .errmsg = true,
1056 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1057 { .name = "utimensat", .errmsg = true,
1058 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1059 { .name = "write", .errmsg = true,
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "writev", .errmsg = true,
1062 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1063 };
1064
1065 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1066 {
1067 const struct syscall_fmt *fmt = fmtp;
1068 return strcmp(name, fmt->name);
1069 }
1070
1071 static struct syscall_fmt *syscall_fmt__find(const char *name)
1072 {
1073 const int nmemb = ARRAY_SIZE(syscall_fmts);
1074 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1075 }
1076
1077 struct syscall {
1078 struct event_format *tp_format;
1079 const char *name;
1080 bool filtered;
1081 struct syscall_fmt *fmt;
1082 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1083 void **arg_parm;
1084 };
1085
1086 static size_t fprintf_duration(unsigned long t, FILE *fp)
1087 {
1088 double duration = (double)t / NSEC_PER_MSEC;
1089 size_t printed = fprintf(fp, "(");
1090
1091 if (duration >= 1.0)
1092 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1093 else if (duration >= 0.01)
1094 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1095 else
1096 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1097 return printed + fprintf(fp, "): ");
1098 }
1099
1100 struct thread_trace {
1101 u64 entry_time;
1102 u64 exit_time;
1103 bool entry_pending;
1104 unsigned long nr_events;
1105 char *entry_str;
1106 double runtime_ms;
1107 struct {
1108 int max;
1109 char **table;
1110 } paths;
1111
1112 struct intlist *syscall_stats;
1113 };
1114
1115 static struct thread_trace *thread_trace__new(void)
1116 {
1117 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1118
1119 if (ttrace)
1120 ttrace->paths.max = -1;
1121
1122 ttrace->syscall_stats = intlist__new(NULL);
1123
1124 return ttrace;
1125 }
1126
1127 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1128 {
1129 struct thread_trace *ttrace;
1130
1131 if (thread == NULL)
1132 goto fail;
1133
1134 if (thread->priv == NULL)
1135 thread->priv = thread_trace__new();
1136
1137 if (thread->priv == NULL)
1138 goto fail;
1139
1140 ttrace = thread->priv;
1141 ++ttrace->nr_events;
1142
1143 return ttrace;
1144 fail:
1145 color_fprintf(fp, PERF_COLOR_RED,
1146 "WARNING: not enough memory, dropping samples!\n");
1147 return NULL;
1148 }
1149
1150 struct trace {
1151 struct perf_tool tool;
1152 struct {
1153 int machine;
1154 int open_id;
1155 } audit;
1156 struct {
1157 int max;
1158 struct syscall *table;
1159 } syscalls;
1160 struct record_opts opts;
1161 struct machine *host;
1162 u64 base_time;
1163 bool full_time;
1164 FILE *output;
1165 unsigned long nr_events;
1166 struct strlist *ev_qualifier;
1167 bool not_ev_qualifier;
1168 bool live;
1169 const char *last_vfs_getname;
1170 struct intlist *tid_list;
1171 struct intlist *pid_list;
1172 bool sched;
1173 bool multiple_threads;
1174 bool summary;
1175 bool summary_only;
1176 bool show_comm;
1177 bool show_tool_stats;
1178 double duration_filter;
1179 double runtime_ms;
1180 struct {
1181 u64 vfs_getname, proc_getname;
1182 } stats;
1183 };
1184
1185 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1186 {
1187 struct thread_trace *ttrace = thread->priv;
1188
1189 if (fd > ttrace->paths.max) {
1190 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1191
1192 if (npath == NULL)
1193 return -1;
1194
1195 if (ttrace->paths.max != -1) {
1196 memset(npath + ttrace->paths.max + 1, 0,
1197 (fd - ttrace->paths.max) * sizeof(char *));
1198 } else {
1199 memset(npath, 0, (fd + 1) * sizeof(char *));
1200 }
1201
1202 ttrace->paths.table = npath;
1203 ttrace->paths.max = fd;
1204 }
1205
1206 ttrace->paths.table[fd] = strdup(pathname);
1207
1208 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1209 }
1210
1211 static int thread__read_fd_path(struct thread *thread, int fd)
1212 {
1213 char linkname[PATH_MAX], pathname[PATH_MAX];
1214 struct stat st;
1215 int ret;
1216
1217 if (thread->pid_ == thread->tid) {
1218 scnprintf(linkname, sizeof(linkname),
1219 "/proc/%d/fd/%d", thread->pid_, fd);
1220 } else {
1221 scnprintf(linkname, sizeof(linkname),
1222 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1223 }
1224
1225 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1226 return -1;
1227
1228 ret = readlink(linkname, pathname, sizeof(pathname));
1229
1230 if (ret < 0 || ret > st.st_size)
1231 return -1;
1232
1233 pathname[ret] = '\0';
1234 return trace__set_fd_pathname(thread, fd, pathname);
1235 }
1236
1237 static const char *thread__fd_path(struct thread *thread, int fd,
1238 struct trace *trace)
1239 {
1240 struct thread_trace *ttrace = thread->priv;
1241
1242 if (ttrace == NULL)
1243 return NULL;
1244
1245 if (fd < 0)
1246 return NULL;
1247
1248 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL))
1249 if (!trace->live)
1250 return NULL;
1251 ++trace->stats.proc_getname;
1252 if (thread__read_fd_path(thread, fd)) {
1253 return NULL;
1254 }
1255
1256 return ttrace->paths.table[fd];
1257 }
1258
1259 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1260 struct syscall_arg *arg)
1261 {
1262 int fd = arg->val;
1263 size_t printed = scnprintf(bf, size, "%d", fd);
1264 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1265
1266 if (path)
1267 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1268
1269 return printed;
1270 }
1271
1272 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1273 struct syscall_arg *arg)
1274 {
1275 int fd = arg->val;
1276 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1277 struct thread_trace *ttrace = arg->thread->priv;
1278
1279 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1280 zfree(&ttrace->paths.table[fd]);
1281
1282 return printed;
1283 }
1284
1285 static bool trace__filter_duration(struct trace *trace, double t)
1286 {
1287 return t < (trace->duration_filter * NSEC_PER_MSEC);
1288 }
1289
1290 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1291 {
1292 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1293
1294 return fprintf(fp, "%10.3f ", ts);
1295 }
1296
1297 static bool done = false;
1298 static bool interrupted = false;
1299
1300 static void sig_handler(int sig)
1301 {
1302 done = true;
1303 interrupted = sig == SIGINT;
1304 }
1305
1306 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1307 u64 duration, u64 tstamp, FILE *fp)
1308 {
1309 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1310 printed += fprintf_duration(duration, fp);
1311
1312 if (trace->multiple_threads) {
1313 if (trace->show_comm)
1314 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1315 printed += fprintf(fp, "%d ", thread->tid);
1316 }
1317
1318 return printed;
1319 }
1320
1321 static int trace__process_event(struct trace *trace, struct machine *machine,
1322 union perf_event *event, struct perf_sample *sample)
1323 {
1324 int ret = 0;
1325
1326 switch (event->header.type) {
1327 case PERF_RECORD_LOST:
1328 color_fprintf(trace->output, PERF_COLOR_RED,
1329 "LOST %" PRIu64 " events!\n", event->lost.lost);
1330 ret = machine__process_lost_event(machine, event, sample);
1331 default:
1332 ret = machine__process_event(machine, event, sample);
1333 break;
1334 }
1335
1336 return ret;
1337 }
1338
1339 static int trace__tool_process(struct perf_tool *tool,
1340 union perf_event *event,
1341 struct perf_sample *sample,
1342 struct machine *machine)
1343 {
1344 struct trace *trace = container_of(tool, struct trace, tool);
1345 return trace__process_event(trace, machine, event, sample);
1346 }
1347
1348 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1349 {
1350 int err = symbol__init();
1351
1352 if (err)
1353 return err;
1354
1355 trace->host = machine__new_host();
1356 if (trace->host == NULL)
1357 return -ENOMEM;
1358
1359 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1360 evlist->threads, trace__tool_process, false);
1361 if (err)
1362 symbol__exit();
1363
1364 return err;
1365 }
1366
1367 static int syscall__set_arg_fmts(struct syscall *sc)
1368 {
1369 struct format_field *field;
1370 int idx = 0;
1371
1372 sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *));
1373 if (sc->arg_scnprintf == NULL)
1374 return -1;
1375
1376 if (sc->fmt)
1377 sc->arg_parm = sc->fmt->arg_parm;
1378
1379 for (field = sc->tp_format->format.fields->next; field; field = field->next) {
1380 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1381 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1382 else if (field->flags & FIELD_IS_POINTER)
1383 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1384 ++idx;
1385 }
1386
1387 return 0;
1388 }
1389
1390 static int trace__read_syscall_info(struct trace *trace, int id)
1391 {
1392 char tp_name[128];
1393 struct syscall *sc;
1394 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1395
1396 if (name == NULL)
1397 return -1;
1398
1399 if (id > trace->syscalls.max) {
1400 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1401
1402 if (nsyscalls == NULL)
1403 return -1;
1404
1405 if (trace->syscalls.max != -1) {
1406 memset(nsyscalls + trace->syscalls.max + 1, 0,
1407 (id - trace->syscalls.max) * sizeof(*sc));
1408 } else {
1409 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1410 }
1411
1412 trace->syscalls.table = nsyscalls;
1413 trace->syscalls.max = id;
1414 }
1415
1416 sc = trace->syscalls.table + id;
1417 sc->name = name;
1418
1419 if (trace->ev_qualifier) {
1420 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1421
1422 if (!(in ^ trace->not_ev_qualifier)) {
1423 sc->filtered = true;
1424 /*
1425 * No need to do read tracepoint information since this will be
1426 * filtered out.
1427 */
1428 return 0;
1429 }
1430 }
1431
1432 sc->fmt = syscall_fmt__find(sc->name);
1433
1434 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1435 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1436
1437 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1438 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1439 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1440 }
1441
1442 if (sc->tp_format == NULL)
1443 return -1;
1444
1445 return syscall__set_arg_fmts(sc);
1446 }
1447
1448 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1449 unsigned long *args, struct trace *trace,
1450 struct thread *thread)
1451 {
1452 size_t printed = 0;
1453
1454 if (sc->tp_format != NULL) {
1455 struct format_field *field;
1456 u8 bit = 1;
1457 struct syscall_arg arg = {
1458 .idx = 0,
1459 .mask = 0,
1460 .trace = trace,
1461 .thread = thread,
1462 };
1463
1464 for (field = sc->tp_format->format.fields->next; field;
1465 field = field->next, ++arg.idx, bit <<= 1) {
1466 if (arg.mask & bit)
1467 continue;
1468 /*
1469 * Suppress this argument if its value is zero and
1470 * and we don't have a string associated in an
1471 * strarray for it.
1472 */
1473 if (args[arg.idx] == 0 &&
1474 !(sc->arg_scnprintf &&
1475 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1476 sc->arg_parm[arg.idx]))
1477 continue;
1478
1479 printed += scnprintf(bf + printed, size - printed,
1480 "%s%s: ", printed ? ", " : "", field->name);
1481 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1482 arg.val = args[arg.idx];
1483 if (sc->arg_parm)
1484 arg.parm = sc->arg_parm[arg.idx];
1485 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1486 size - printed, &arg);
1487 } else {
1488 printed += scnprintf(bf + printed, size - printed,
1489 "%ld", args[arg.idx]);
1490 }
1491 }
1492 } else {
1493 int i = 0;
1494
1495 while (i < 6) {
1496 printed += scnprintf(bf + printed, size - printed,
1497 "%sarg%d: %ld",
1498 printed ? ", " : "", i, args[i]);
1499 ++i;
1500 }
1501 }
1502
1503 return printed;
1504 }
1505
1506 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1507 struct perf_sample *sample);
1508
1509 static struct syscall *trace__syscall_info(struct trace *trace,
1510 struct perf_evsel *evsel, int id)
1511 {
1512
1513 if (id < 0) {
1514
1515 /*
1516 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1517 * before that, leaving at a higher verbosity level till that is
1518 * explained. Reproduced with plain ftrace with:
1519 *
1520 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1521 * grep "NR -1 " /t/trace_pipe
1522 *
1523 * After generating some load on the machine.
1524 */
1525 if (verbose > 1) {
1526 static u64 n;
1527 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1528 id, perf_evsel__name(evsel), ++n);
1529 }
1530 return NULL;
1531 }
1532
1533 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1534 trace__read_syscall_info(trace, id))
1535 goto out_cant_read;
1536
1537 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1538 goto out_cant_read;
1539
1540 return &trace->syscalls.table[id];
1541
1542 out_cant_read:
1543 if (verbose) {
1544 fprintf(trace->output, "Problems reading syscall %d", id);
1545 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1546 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1547 fputs(" information\n", trace->output);
1548 }
1549 return NULL;
1550 }
1551
1552 static void thread__update_stats(struct thread_trace *ttrace,
1553 int id, struct perf_sample *sample)
1554 {
1555 struct int_node *inode;
1556 struct stats *stats;
1557 u64 duration = 0;
1558
1559 inode = intlist__findnew(ttrace->syscall_stats, id);
1560 if (inode == NULL)
1561 return;
1562
1563 stats = inode->priv;
1564 if (stats == NULL) {
1565 stats = malloc(sizeof(struct stats));
1566 if (stats == NULL)
1567 return;
1568 init_stats(stats);
1569 inode->priv = stats;
1570 }
1571
1572 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1573 duration = sample->time - ttrace->entry_time;
1574
1575 update_stats(stats, duration);
1576 }
1577
1578 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1579 struct perf_sample *sample)
1580 {
1581 char *msg;
1582 void *args;
1583 size_t printed = 0;
1584 struct thread *thread;
1585 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1586 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1587 struct thread_trace *ttrace;
1588
1589 if (sc == NULL)
1590 return -1;
1591
1592 if (sc->filtered)
1593 return 0;
1594
1595 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1596 ttrace = thread__trace(thread, trace->output);
1597 if (ttrace == NULL)
1598 return -1;
1599
1600 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1601 ttrace = thread->priv;
1602
1603 if (ttrace->entry_str == NULL) {
1604 ttrace->entry_str = malloc(1024);
1605 if (!ttrace->entry_str)
1606 return -1;
1607 }
1608
1609 ttrace->entry_time = sample->time;
1610 msg = ttrace->entry_str;
1611 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1612
1613 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1614 args, trace, thread);
1615
1616 if (!strcmp(sc->name, "exit_group") || !strcmp(sc->name, "exit")) {
1617 if (!trace->duration_filter && !trace->summary_only) {
1618 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1619 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1620 }
1621 } else
1622 ttrace->entry_pending = true;
1623
1624 return 0;
1625 }
1626
1627 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1628 struct perf_sample *sample)
1629 {
1630 int ret;
1631 u64 duration = 0;
1632 struct thread *thread;
1633 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1634 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1635 struct thread_trace *ttrace;
1636
1637 if (sc == NULL)
1638 return -1;
1639
1640 if (sc->filtered)
1641 return 0;
1642
1643 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1644 ttrace = thread__trace(thread, trace->output);
1645 if (ttrace == NULL)
1646 return -1;
1647
1648 if (trace->summary)
1649 thread__update_stats(ttrace, id, sample);
1650
1651 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1652
1653 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1654 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1655 trace->last_vfs_getname = NULL;
1656 ++trace->stats.vfs_getname;
1657 }
1658
1659 ttrace = thread->priv;
1660
1661 ttrace->exit_time = sample->time;
1662
1663 if (ttrace->entry_time) {
1664 duration = sample->time - ttrace->entry_time;
1665 if (trace__filter_duration(trace, duration))
1666 goto out;
1667 } else if (trace->duration_filter)
1668 goto out;
1669
1670 if (trace->summary_only)
1671 goto out;
1672
1673 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1674
1675 if (ttrace->entry_pending) {
1676 fprintf(trace->output, "%-70s", ttrace->entry_str);
1677 } else {
1678 fprintf(trace->output, " ... [");
1679 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1680 fprintf(trace->output, "]: %s()", sc->name);
1681 }
1682
1683 if (sc->fmt == NULL) {
1684 signed_print:
1685 fprintf(trace->output, ") = %d", ret);
1686 } else if (ret < 0 && sc->fmt->errmsg) {
1687 char bf[256];
1688 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1689 *e = audit_errno_to_name(-ret);
1690
1691 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1692 } else if (ret == 0 && sc->fmt->timeout)
1693 fprintf(trace->output, ") = 0 Timeout");
1694 else if (sc->fmt->hexret)
1695 fprintf(trace->output, ") = %#x", ret);
1696 else
1697 goto signed_print;
1698
1699 fputc('\n', trace->output);
1700 out:
1701 ttrace->entry_pending = false;
1702
1703 return 0;
1704 }
1705
1706 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1707 struct perf_sample *sample)
1708 {
1709 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1710 return 0;
1711 }
1712
1713 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1714 struct perf_sample *sample)
1715 {
1716 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1717 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1718 struct thread *thread = machine__findnew_thread(trace->host,
1719 sample->pid,
1720 sample->tid);
1721 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1722
1723 if (ttrace == NULL)
1724 goto out_dump;
1725
1726 ttrace->runtime_ms += runtime_ms;
1727 trace->runtime_ms += runtime_ms;
1728 return 0;
1729
1730 out_dump:
1731 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1732 evsel->name,
1733 perf_evsel__strval(evsel, sample, "comm"),
1734 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1735 runtime,
1736 perf_evsel__intval(evsel, sample, "vruntime"));
1737 return 0;
1738 }
1739
1740 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1741 {
1742 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1743 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1744 return false;
1745
1746 if (trace->pid_list || trace->tid_list)
1747 return true;
1748
1749 return false;
1750 }
1751
1752 static int trace__process_sample(struct perf_tool *tool,
1753 union perf_event *event __maybe_unused,
1754 struct perf_sample *sample,
1755 struct perf_evsel *evsel,
1756 struct machine *machine __maybe_unused)
1757 {
1758 struct trace *trace = container_of(tool, struct trace, tool);
1759 int err = 0;
1760
1761 tracepoint_handler handler = evsel->handler;
1762
1763 if (skip_sample(trace, sample))
1764 return 0;
1765
1766 if (!trace->full_time && trace->base_time == 0)
1767 trace->base_time = sample->time;
1768
1769 if (handler) {
1770 ++trace->nr_events;
1771 handler(trace, evsel, sample);
1772 }
1773
1774 return err;
1775 }
1776
1777 static int parse_target_str(struct trace *trace)
1778 {
1779 if (trace->opts.target.pid) {
1780 trace->pid_list = intlist__new(trace->opts.target.pid);
1781 if (trace->pid_list == NULL) {
1782 pr_err("Error parsing process id string\n");
1783 return -EINVAL;
1784 }
1785 }
1786
1787 if (trace->opts.target.tid) {
1788 trace->tid_list = intlist__new(trace->opts.target.tid);
1789 if (trace->tid_list == NULL) {
1790 pr_err("Error parsing thread id string\n");
1791 return -EINVAL;
1792 }
1793 }
1794
1795 return 0;
1796 }
1797
1798 static int trace__record(int argc, const char **argv)
1799 {
1800 unsigned int rec_argc, i, j;
1801 const char **rec_argv;
1802 const char * const record_args[] = {
1803 "record",
1804 "-R",
1805 "-m", "1024",
1806 "-c", "1",
1807 "-e",
1808 };
1809
1810 /* +1 is for the event string below */
1811 rec_argc = ARRAY_SIZE(record_args) + 1 + argc;
1812 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1813
1814 if (rec_argv == NULL)
1815 return -ENOMEM;
1816
1817 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1818 rec_argv[i] = record_args[i];
1819
1820 /* event string may be different for older kernels - e.g., RHEL6 */
1821 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1822 rec_argv[i] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1823 else if (is_valid_tracepoint("syscalls:sys_enter"))
1824 rec_argv[i] = "syscalls:sys_enter,syscalls:sys_exit";
1825 else {
1826 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1827 return -1;
1828 }
1829 i++;
1830
1831 for (j = 0; j < (unsigned int)argc; j++, i++)
1832 rec_argv[i] = argv[j];
1833
1834 return cmd_record(i, rec_argv, NULL);
1835 }
1836
1837 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1838
1839 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
1840 {
1841 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
1842 if (evsel == NULL)
1843 return;
1844
1845 if (perf_evsel__field(evsel, "pathname") == NULL) {
1846 perf_evsel__delete(evsel);
1847 return;
1848 }
1849
1850 evsel->handler = trace__vfs_getname;
1851 perf_evlist__add(evlist, evsel);
1852 }
1853
1854 static int trace__run(struct trace *trace, int argc, const char **argv)
1855 {
1856 struct perf_evlist *evlist = perf_evlist__new();
1857 struct perf_evsel *evsel;
1858 int err = -1, i;
1859 unsigned long before;
1860 const bool forks = argc > 0;
1861
1862 trace->live = true;
1863
1864 if (evlist == NULL) {
1865 fprintf(trace->output, "Not enough memory to run!\n");
1866 goto out;
1867 }
1868
1869 if (perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit))
1870 goto out_error_tp;
1871
1872 perf_evlist__add_vfs_getname(evlist);
1873
1874 if (trace->sched &&
1875 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
1876 trace__sched_stat_runtime))
1877 goto out_error_tp;
1878
1879 err = perf_evlist__create_maps(evlist, &trace->opts.target);
1880 if (err < 0) {
1881 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
1882 goto out_delete_evlist;
1883 }
1884
1885 err = trace__symbols_init(trace, evlist);
1886 if (err < 0) {
1887 fprintf(trace->output, "Problems initializing symbol libraries!\n");
1888 goto out_delete_maps;
1889 }
1890
1891 perf_evlist__config(evlist, &trace->opts);
1892
1893 signal(SIGCHLD, sig_handler);
1894 signal(SIGINT, sig_handler);
1895
1896 if (forks) {
1897 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
1898 argv, false, false);
1899 if (err < 0) {
1900 fprintf(trace->output, "Couldn't run the workload!\n");
1901 goto out_delete_maps;
1902 }
1903 }
1904
1905 err = perf_evlist__open(evlist);
1906 if (err < 0)
1907 goto out_error_open;
1908
1909 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
1910 if (err < 0) {
1911 fprintf(trace->output, "Couldn't mmap the events: %s\n", strerror(errno));
1912 goto out_close_evlist;
1913 }
1914
1915 perf_evlist__enable(evlist);
1916
1917 if (forks)
1918 perf_evlist__start_workload(evlist);
1919
1920 trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1;
1921 again:
1922 before = trace->nr_events;
1923
1924 for (i = 0; i < evlist->nr_mmaps; i++) {
1925 union perf_event *event;
1926
1927 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
1928 const u32 type = event->header.type;
1929 tracepoint_handler handler;
1930 struct perf_sample sample;
1931
1932 ++trace->nr_events;
1933
1934 err = perf_evlist__parse_sample(evlist, event, &sample);
1935 if (err) {
1936 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
1937 goto next_event;
1938 }
1939
1940 if (!trace->full_time && trace->base_time == 0)
1941 trace->base_time = sample.time;
1942
1943 if (type != PERF_RECORD_SAMPLE) {
1944 trace__process_event(trace, trace->host, event, &sample);
1945 continue;
1946 }
1947
1948 evsel = perf_evlist__id2evsel(evlist, sample.id);
1949 if (evsel == NULL) {
1950 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id);
1951 goto next_event;
1952 }
1953
1954 if (sample.raw_data == NULL) {
1955 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
1956 perf_evsel__name(evsel), sample.tid,
1957 sample.cpu, sample.raw_size);
1958 goto next_event;
1959 }
1960
1961 handler = evsel->handler;
1962 handler(trace, evsel, &sample);
1963 next_event:
1964 perf_evlist__mmap_consume(evlist, i);
1965
1966 if (interrupted)
1967 goto out_disable;
1968 }
1969 }
1970
1971 if (trace->nr_events == before) {
1972 int timeout = done ? 100 : -1;
1973
1974 if (poll(evlist->pollfd, evlist->nr_fds, timeout) > 0)
1975 goto again;
1976 } else {
1977 goto again;
1978 }
1979
1980 out_disable:
1981 perf_evlist__disable(evlist);
1982
1983 if (!err) {
1984 if (trace->summary)
1985 trace__fprintf_thread_summary(trace, trace->output);
1986
1987 if (trace->show_tool_stats) {
1988 fprintf(trace->output, "Stats:\n "
1989 " vfs_getname : %" PRIu64 "\n"
1990 " proc_getname: %" PRIu64 "\n",
1991 trace->stats.vfs_getname,
1992 trace->stats.proc_getname);
1993 }
1994 }
1995
1996 perf_evlist__munmap(evlist);
1997 out_close_evlist:
1998 perf_evlist__close(evlist);
1999 out_delete_maps:
2000 perf_evlist__delete_maps(evlist);
2001 out_delete_evlist:
2002 perf_evlist__delete(evlist);
2003 out:
2004 trace->live = false;
2005 return err;
2006 {
2007 char errbuf[BUFSIZ];
2008
2009 out_error_tp:
2010 perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
2011 goto out_error;
2012
2013 out_error_open:
2014 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2015
2016 out_error:
2017 fprintf(trace->output, "%s\n", errbuf);
2018 goto out_delete_evlist;
2019 }
2020 }
2021
2022 static int trace__replay(struct trace *trace)
2023 {
2024 const struct perf_evsel_str_handler handlers[] = {
2025 { "probe:vfs_getname", trace__vfs_getname, },
2026 };
2027 struct perf_data_file file = {
2028 .path = input_name,
2029 .mode = PERF_DATA_MODE_READ,
2030 };
2031 struct perf_session *session;
2032 struct perf_evsel *evsel;
2033 int err = -1;
2034
2035 trace->tool.sample = trace__process_sample;
2036 trace->tool.mmap = perf_event__process_mmap;
2037 trace->tool.mmap2 = perf_event__process_mmap2;
2038 trace->tool.comm = perf_event__process_comm;
2039 trace->tool.exit = perf_event__process_exit;
2040 trace->tool.fork = perf_event__process_fork;
2041 trace->tool.attr = perf_event__process_attr;
2042 trace->tool.tracing_data = perf_event__process_tracing_data;
2043 trace->tool.build_id = perf_event__process_build_id;
2044
2045 trace->tool.ordered_samples = true;
2046 trace->tool.ordering_requires_timestamps = true;
2047
2048 /* add tid to output */
2049 trace->multiple_threads = true;
2050
2051 if (symbol__init() < 0)
2052 return -1;
2053
2054 session = perf_session__new(&file, false, &trace->tool);
2055 if (session == NULL)
2056 return -ENOMEM;
2057
2058 trace->host = &session->machines.host;
2059
2060 err = perf_session__set_tracepoints_handlers(session, handlers);
2061 if (err)
2062 goto out;
2063
2064 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2065 "raw_syscalls:sys_enter");
2066 /* older kernels have syscalls tp versus raw_syscalls */
2067 if (evsel == NULL)
2068 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2069 "syscalls:sys_enter");
2070 if (evsel == NULL) {
2071 pr_err("Data file does not have raw_syscalls:sys_enter event\n");
2072 goto out;
2073 }
2074
2075 if (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2076 perf_evsel__init_sc_tp_ptr_field(evsel, args)) {
2077 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2078 goto out;
2079 }
2080
2081 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2082 "raw_syscalls:sys_exit");
2083 if (evsel == NULL)
2084 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2085 "syscalls:sys_exit");
2086 if (evsel == NULL) {
2087 pr_err("Data file does not have raw_syscalls:sys_exit event\n");
2088 goto out;
2089 }
2090
2091 if (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2092 perf_evsel__init_sc_tp_uint_field(evsel, ret)) {
2093 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2094 goto out;
2095 }
2096
2097 err = parse_target_str(trace);
2098 if (err != 0)
2099 goto out;
2100
2101 setup_pager();
2102
2103 err = perf_session__process_events(session, &trace->tool);
2104 if (err)
2105 pr_err("Failed to process events, error %d", err);
2106
2107 else if (trace->summary)
2108 trace__fprintf_thread_summary(trace, trace->output);
2109
2110 out:
2111 perf_session__delete(session);
2112
2113 return err;
2114 }
2115
2116 static size_t trace__fprintf_threads_header(FILE *fp)
2117 {
2118 size_t printed;
2119
2120 printed = fprintf(fp, "\n Summary of events:\n\n");
2121
2122 return printed;
2123 }
2124
2125 static size_t thread__dump_stats(struct thread_trace *ttrace,
2126 struct trace *trace, FILE *fp)
2127 {
2128 struct stats *stats;
2129 size_t printed = 0;
2130 struct syscall *sc;
2131 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2132
2133 if (inode == NULL)
2134 return 0;
2135
2136 printed += fprintf(fp, "\n");
2137
2138 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2139 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2140 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2141
2142 /* each int_node is a syscall */
2143 while (inode) {
2144 stats = inode->priv;
2145 if (stats) {
2146 double min = (double)(stats->min) / NSEC_PER_MSEC;
2147 double max = (double)(stats->max) / NSEC_PER_MSEC;
2148 double avg = avg_stats(stats);
2149 double pct;
2150 u64 n = (u64) stats->n;
2151
2152 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2153 avg /= NSEC_PER_MSEC;
2154
2155 sc = &trace->syscalls.table[inode->i];
2156 printed += fprintf(fp, " %-15s", sc->name);
2157 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2158 n, min, avg);
2159 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2160 }
2161
2162 inode = intlist__next(inode);
2163 }
2164
2165 printed += fprintf(fp, "\n\n");
2166
2167 return printed;
2168 }
2169
2170 /* struct used to pass data to per-thread function */
2171 struct summary_data {
2172 FILE *fp;
2173 struct trace *trace;
2174 size_t printed;
2175 };
2176
2177 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2178 {
2179 struct summary_data *data = priv;
2180 FILE *fp = data->fp;
2181 size_t printed = data->printed;
2182 struct trace *trace = data->trace;
2183 struct thread_trace *ttrace = thread->priv;
2184 double ratio;
2185
2186 if (ttrace == NULL)
2187 return 0;
2188
2189 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2190
2191 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2192 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2193 printed += fprintf(fp, "%.1f%%", ratio);
2194 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2195 printed += thread__dump_stats(ttrace, trace, fp);
2196
2197 data->printed += printed;
2198
2199 return 0;
2200 }
2201
2202 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2203 {
2204 struct summary_data data = {
2205 .fp = fp,
2206 .trace = trace
2207 };
2208 data.printed = trace__fprintf_threads_header(fp);
2209
2210 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2211
2212 return data.printed;
2213 }
2214
2215 static int trace__set_duration(const struct option *opt, const char *str,
2216 int unset __maybe_unused)
2217 {
2218 struct trace *trace = opt->value;
2219
2220 trace->duration_filter = atof(str);
2221 return 0;
2222 }
2223
2224 static int trace__open_output(struct trace *trace, const char *filename)
2225 {
2226 struct stat st;
2227
2228 if (!stat(filename, &st) && st.st_size) {
2229 char oldname[PATH_MAX];
2230
2231 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2232 unlink(oldname);
2233 rename(filename, oldname);
2234 }
2235
2236 trace->output = fopen(filename, "w");
2237
2238 return trace->output == NULL ? -errno : 0;
2239 }
2240
2241 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2242 {
2243 const char * const trace_usage[] = {
2244 "perf trace [<options>] [<command>]",
2245 "perf trace [<options>] -- <command> [<options>]",
2246 "perf trace record [<options>] [<command>]",
2247 "perf trace record [<options>] -- <command> [<options>]",
2248 NULL
2249 };
2250 struct trace trace = {
2251 .audit = {
2252 .machine = audit_detect_machine(),
2253 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2254 },
2255 .syscalls = {
2256 . max = -1,
2257 },
2258 .opts = {
2259 .target = {
2260 .uid = UINT_MAX,
2261 .uses_mmap = true,
2262 },
2263 .user_freq = UINT_MAX,
2264 .user_interval = ULLONG_MAX,
2265 .no_delay = true,
2266 .mmap_pages = 1024,
2267 },
2268 .output = stdout,
2269 .show_comm = true,
2270 };
2271 const char *output_name = NULL;
2272 const char *ev_qualifier_str = NULL;
2273 const struct option trace_options[] = {
2274 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2275 "show the thread COMM next to its id"),
2276 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2277 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2278 "list of events to trace"),
2279 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2280 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2281 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2282 "trace events on existing process id"),
2283 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2284 "trace events on existing thread id"),
2285 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2286 "system-wide collection from all CPUs"),
2287 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2288 "list of cpus to monitor"),
2289 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2290 "child tasks do not inherit counters"),
2291 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2292 "number of mmap data pages",
2293 perf_evlist__parse_mmap_pages),
2294 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2295 "user to profile"),
2296 OPT_CALLBACK(0, "duration", &trace, "float",
2297 "show only events with duration > N.M ms",
2298 trace__set_duration),
2299 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2300 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2301 OPT_BOOLEAN('T', "time", &trace.full_time,
2302 "Show full timestamp, not time relative to first start"),
2303 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2304 "Show only syscall summary with statistics"),
2305 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2306 "Show all syscalls and summary with statistics"),
2307 OPT_END()
2308 };
2309 int err;
2310 char bf[BUFSIZ];
2311
2312 if ((argc > 1) && (strcmp(argv[1], "record") == 0))
2313 return trace__record(argc-2, &argv[2]);
2314
2315 argc = parse_options(argc, argv, trace_options, trace_usage, 0);
2316
2317 /* summary_only implies summary option, but don't overwrite summary if set */
2318 if (trace.summary_only)
2319 trace.summary = trace.summary_only;
2320
2321 if (output_name != NULL) {
2322 err = trace__open_output(&trace, output_name);
2323 if (err < 0) {
2324 perror("failed to create output file");
2325 goto out;
2326 }
2327 }
2328
2329 if (ev_qualifier_str != NULL) {
2330 const char *s = ev_qualifier_str;
2331
2332 trace.not_ev_qualifier = *s == '!';
2333 if (trace.not_ev_qualifier)
2334 ++s;
2335 trace.ev_qualifier = strlist__new(true, s);
2336 if (trace.ev_qualifier == NULL) {
2337 fputs("Not enough memory to parse event qualifier",
2338 trace.output);
2339 err = -ENOMEM;
2340 goto out_close;
2341 }
2342 }
2343
2344 err = target__validate(&trace.opts.target);
2345 if (err) {
2346 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2347 fprintf(trace.output, "%s", bf);
2348 goto out_close;
2349 }
2350
2351 err = target__parse_uid(&trace.opts.target);
2352 if (err) {
2353 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2354 fprintf(trace.output, "%s", bf);
2355 goto out_close;
2356 }
2357
2358 if (!argc && target__none(&trace.opts.target))
2359 trace.opts.target.system_wide = true;
2360
2361 if (input_name)
2362 err = trace__replay(&trace);
2363 else
2364 err = trace__run(&trace, argc, argv);
2365
2366 out_close:
2367 if (output_name != NULL)
2368 fclose(trace.output);
2369 out:
2370 return err;
2371 }