]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - tools/perf/builtin-trace.c
Merge tag 'pinctrl-v4.1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[mirror_ubuntu-zesty-kernel.git] / tools / perf / builtin-trace.c
1 #include <traceevent/event-parse.h>
2 #include "builtin.h"
3 #include "util/color.h"
4 #include "util/debug.h"
5 #include "util/evlist.h"
6 #include "util/machine.h"
7 #include "util/session.h"
8 #include "util/thread.h"
9 #include "util/parse-options.h"
10 #include "util/strlist.h"
11 #include "util/intlist.h"
12 #include "util/thread_map.h"
13 #include "util/stat.h"
14 #include "trace-event.h"
15 #include "util/parse-events.h"
16
17 #include <libaudit.h>
18 #include <stdlib.h>
19 #include <sys/eventfd.h>
20 #include <sys/mman.h>
21 #include <linux/futex.h>
22
23 /* For older distros: */
24 #ifndef MAP_STACK
25 # define MAP_STACK 0x20000
26 #endif
27
28 #ifndef MADV_HWPOISON
29 # define MADV_HWPOISON 100
30 #endif
31
32 #ifndef MADV_MERGEABLE
33 # define MADV_MERGEABLE 12
34 #endif
35
36 #ifndef MADV_UNMERGEABLE
37 # define MADV_UNMERGEABLE 13
38 #endif
39
40 #ifndef EFD_SEMAPHORE
41 # define EFD_SEMAPHORE 1
42 #endif
43
44 struct tp_field {
45 int offset;
46 union {
47 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
48 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
49 };
50 };
51
52 #define TP_UINT_FIELD(bits) \
53 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
54 { \
55 u##bits value; \
56 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
57 return value; \
58 }
59
60 TP_UINT_FIELD(8);
61 TP_UINT_FIELD(16);
62 TP_UINT_FIELD(32);
63 TP_UINT_FIELD(64);
64
65 #define TP_UINT_FIELD__SWAPPED(bits) \
66 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
67 { \
68 u##bits value; \
69 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
70 return bswap_##bits(value);\
71 }
72
73 TP_UINT_FIELD__SWAPPED(16);
74 TP_UINT_FIELD__SWAPPED(32);
75 TP_UINT_FIELD__SWAPPED(64);
76
77 static int tp_field__init_uint(struct tp_field *field,
78 struct format_field *format_field,
79 bool needs_swap)
80 {
81 field->offset = format_field->offset;
82
83 switch (format_field->size) {
84 case 1:
85 field->integer = tp_field__u8;
86 break;
87 case 2:
88 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
89 break;
90 case 4:
91 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
92 break;
93 case 8:
94 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
95 break;
96 default:
97 return -1;
98 }
99
100 return 0;
101 }
102
103 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
104 {
105 return sample->raw_data + field->offset;
106 }
107
108 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
109 {
110 field->offset = format_field->offset;
111 field->pointer = tp_field__ptr;
112 return 0;
113 }
114
115 struct syscall_tp {
116 struct tp_field id;
117 union {
118 struct tp_field args, ret;
119 };
120 };
121
122 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
123 struct tp_field *field,
124 const char *name)
125 {
126 struct format_field *format_field = perf_evsel__field(evsel, name);
127
128 if (format_field == NULL)
129 return -1;
130
131 return tp_field__init_uint(field, format_field, evsel->needs_swap);
132 }
133
134 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
135 ({ struct syscall_tp *sc = evsel->priv;\
136 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
137
138 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
139 struct tp_field *field,
140 const char *name)
141 {
142 struct format_field *format_field = perf_evsel__field(evsel, name);
143
144 if (format_field == NULL)
145 return -1;
146
147 return tp_field__init_ptr(field, format_field);
148 }
149
150 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
151 ({ struct syscall_tp *sc = evsel->priv;\
152 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
153
154 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
155 {
156 zfree(&evsel->priv);
157 perf_evsel__delete(evsel);
158 }
159
160 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
161 {
162 evsel->priv = malloc(sizeof(struct syscall_tp));
163 if (evsel->priv != NULL) {
164 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
165 goto out_delete;
166
167 evsel->handler = handler;
168 return 0;
169 }
170
171 return -ENOMEM;
172
173 out_delete:
174 zfree(&evsel->priv);
175 return -ENOENT;
176 }
177
178 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
179 {
180 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
181
182 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
183 if (evsel == NULL)
184 evsel = perf_evsel__newtp("syscalls", direction);
185
186 if (evsel) {
187 if (perf_evsel__init_syscall_tp(evsel, handler))
188 goto out_delete;
189 }
190
191 return evsel;
192
193 out_delete:
194 perf_evsel__delete_priv(evsel);
195 return NULL;
196 }
197
198 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
199 ({ struct syscall_tp *fields = evsel->priv; \
200 fields->name.integer(&fields->name, sample); })
201
202 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
203 ({ struct syscall_tp *fields = evsel->priv; \
204 fields->name.pointer(&fields->name, sample); })
205
206 static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist,
207 void *sys_enter_handler,
208 void *sys_exit_handler)
209 {
210 int ret = -1;
211 struct perf_evsel *sys_enter, *sys_exit;
212
213 sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler);
214 if (sys_enter == NULL)
215 goto out;
216
217 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
218 goto out_delete_sys_enter;
219
220 sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler);
221 if (sys_exit == NULL)
222 goto out_delete_sys_enter;
223
224 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
225 goto out_delete_sys_exit;
226
227 perf_evlist__add(evlist, sys_enter);
228 perf_evlist__add(evlist, sys_exit);
229
230 ret = 0;
231 out:
232 return ret;
233
234 out_delete_sys_exit:
235 perf_evsel__delete_priv(sys_exit);
236 out_delete_sys_enter:
237 perf_evsel__delete_priv(sys_enter);
238 goto out;
239 }
240
241
242 struct syscall_arg {
243 unsigned long val;
244 struct thread *thread;
245 struct trace *trace;
246 void *parm;
247 u8 idx;
248 u8 mask;
249 };
250
251 struct strarray {
252 int offset;
253 int nr_entries;
254 const char **entries;
255 };
256
257 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
258 .nr_entries = ARRAY_SIZE(array), \
259 .entries = array, \
260 }
261
262 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
263 .offset = off, \
264 .nr_entries = ARRAY_SIZE(array), \
265 .entries = array, \
266 }
267
268 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
269 const char *intfmt,
270 struct syscall_arg *arg)
271 {
272 struct strarray *sa = arg->parm;
273 int idx = arg->val - sa->offset;
274
275 if (idx < 0 || idx >= sa->nr_entries)
276 return scnprintf(bf, size, intfmt, arg->val);
277
278 return scnprintf(bf, size, "%s", sa->entries[idx]);
279 }
280
281 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
282 struct syscall_arg *arg)
283 {
284 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
285 }
286
287 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
288
289 #if defined(__i386__) || defined(__x86_64__)
290 /*
291 * FIXME: Make this available to all arches as soon as the ioctl beautifier
292 * gets rewritten to support all arches.
293 */
294 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
295 struct syscall_arg *arg)
296 {
297 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
298 }
299
300 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
301 #endif /* defined(__i386__) || defined(__x86_64__) */
302
303 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
304 struct syscall_arg *arg);
305
306 #define SCA_FD syscall_arg__scnprintf_fd
307
308 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
309 struct syscall_arg *arg)
310 {
311 int fd = arg->val;
312
313 if (fd == AT_FDCWD)
314 return scnprintf(bf, size, "CWD");
315
316 return syscall_arg__scnprintf_fd(bf, size, arg);
317 }
318
319 #define SCA_FDAT syscall_arg__scnprintf_fd_at
320
321 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
322 struct syscall_arg *arg);
323
324 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
325
326 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
327 struct syscall_arg *arg)
328 {
329 return scnprintf(bf, size, "%#lx", arg->val);
330 }
331
332 #define SCA_HEX syscall_arg__scnprintf_hex
333
334 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
335 struct syscall_arg *arg)
336 {
337 int printed = 0, prot = arg->val;
338
339 if (prot == PROT_NONE)
340 return scnprintf(bf, size, "NONE");
341 #define P_MMAP_PROT(n) \
342 if (prot & PROT_##n) { \
343 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
344 prot &= ~PROT_##n; \
345 }
346
347 P_MMAP_PROT(EXEC);
348 P_MMAP_PROT(READ);
349 P_MMAP_PROT(WRITE);
350 #ifdef PROT_SEM
351 P_MMAP_PROT(SEM);
352 #endif
353 P_MMAP_PROT(GROWSDOWN);
354 P_MMAP_PROT(GROWSUP);
355 #undef P_MMAP_PROT
356
357 if (prot)
358 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
359
360 return printed;
361 }
362
363 #define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
364
365 static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
366 struct syscall_arg *arg)
367 {
368 int printed = 0, flags = arg->val;
369
370 #define P_MMAP_FLAG(n) \
371 if (flags & MAP_##n) { \
372 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
373 flags &= ~MAP_##n; \
374 }
375
376 P_MMAP_FLAG(SHARED);
377 P_MMAP_FLAG(PRIVATE);
378 #ifdef MAP_32BIT
379 P_MMAP_FLAG(32BIT);
380 #endif
381 P_MMAP_FLAG(ANONYMOUS);
382 P_MMAP_FLAG(DENYWRITE);
383 P_MMAP_FLAG(EXECUTABLE);
384 P_MMAP_FLAG(FILE);
385 P_MMAP_FLAG(FIXED);
386 P_MMAP_FLAG(GROWSDOWN);
387 #ifdef MAP_HUGETLB
388 P_MMAP_FLAG(HUGETLB);
389 #endif
390 P_MMAP_FLAG(LOCKED);
391 P_MMAP_FLAG(NONBLOCK);
392 P_MMAP_FLAG(NORESERVE);
393 P_MMAP_FLAG(POPULATE);
394 P_MMAP_FLAG(STACK);
395 #ifdef MAP_UNINITIALIZED
396 P_MMAP_FLAG(UNINITIALIZED);
397 #endif
398 #undef P_MMAP_FLAG
399
400 if (flags)
401 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
402
403 return printed;
404 }
405
406 #define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
407
408 static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
409 struct syscall_arg *arg)
410 {
411 int printed = 0, flags = arg->val;
412
413 #define P_MREMAP_FLAG(n) \
414 if (flags & MREMAP_##n) { \
415 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
416 flags &= ~MREMAP_##n; \
417 }
418
419 P_MREMAP_FLAG(MAYMOVE);
420 #ifdef MREMAP_FIXED
421 P_MREMAP_FLAG(FIXED);
422 #endif
423 #undef P_MREMAP_FLAG
424
425 if (flags)
426 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
427
428 return printed;
429 }
430
431 #define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
432
433 static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
434 struct syscall_arg *arg)
435 {
436 int behavior = arg->val;
437
438 switch (behavior) {
439 #define P_MADV_BHV(n) case MADV_##n: return scnprintf(bf, size, #n)
440 P_MADV_BHV(NORMAL);
441 P_MADV_BHV(RANDOM);
442 P_MADV_BHV(SEQUENTIAL);
443 P_MADV_BHV(WILLNEED);
444 P_MADV_BHV(DONTNEED);
445 P_MADV_BHV(REMOVE);
446 P_MADV_BHV(DONTFORK);
447 P_MADV_BHV(DOFORK);
448 P_MADV_BHV(HWPOISON);
449 #ifdef MADV_SOFT_OFFLINE
450 P_MADV_BHV(SOFT_OFFLINE);
451 #endif
452 P_MADV_BHV(MERGEABLE);
453 P_MADV_BHV(UNMERGEABLE);
454 #ifdef MADV_HUGEPAGE
455 P_MADV_BHV(HUGEPAGE);
456 #endif
457 #ifdef MADV_NOHUGEPAGE
458 P_MADV_BHV(NOHUGEPAGE);
459 #endif
460 #ifdef MADV_DONTDUMP
461 P_MADV_BHV(DONTDUMP);
462 #endif
463 #ifdef MADV_DODUMP
464 P_MADV_BHV(DODUMP);
465 #endif
466 #undef P_MADV_PHV
467 default: break;
468 }
469
470 return scnprintf(bf, size, "%#x", behavior);
471 }
472
473 #define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
474
475 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
476 struct syscall_arg *arg)
477 {
478 int printed = 0, op = arg->val;
479
480 if (op == 0)
481 return scnprintf(bf, size, "NONE");
482 #define P_CMD(cmd) \
483 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
484 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
485 op &= ~LOCK_##cmd; \
486 }
487
488 P_CMD(SH);
489 P_CMD(EX);
490 P_CMD(NB);
491 P_CMD(UN);
492 P_CMD(MAND);
493 P_CMD(RW);
494 P_CMD(READ);
495 P_CMD(WRITE);
496 #undef P_OP
497
498 if (op)
499 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
500
501 return printed;
502 }
503
504 #define SCA_FLOCK syscall_arg__scnprintf_flock
505
506 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
507 {
508 enum syscall_futex_args {
509 SCF_UADDR = (1 << 0),
510 SCF_OP = (1 << 1),
511 SCF_VAL = (1 << 2),
512 SCF_TIMEOUT = (1 << 3),
513 SCF_UADDR2 = (1 << 4),
514 SCF_VAL3 = (1 << 5),
515 };
516 int op = arg->val;
517 int cmd = op & FUTEX_CMD_MASK;
518 size_t printed = 0;
519
520 switch (cmd) {
521 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
522 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
523 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
524 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
525 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
526 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
527 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
528 P_FUTEX_OP(WAKE_OP); break;
529 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
530 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
531 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
532 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
533 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
534 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
535 default: printed = scnprintf(bf, size, "%#x", cmd); break;
536 }
537
538 if (op & FUTEX_PRIVATE_FLAG)
539 printed += scnprintf(bf + printed, size - printed, "|PRIV");
540
541 if (op & FUTEX_CLOCK_REALTIME)
542 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
543
544 return printed;
545 }
546
547 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
548
549 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
550 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
551
552 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
553 static DEFINE_STRARRAY(itimers);
554
555 static const char *whences[] = { "SET", "CUR", "END",
556 #ifdef SEEK_DATA
557 "DATA",
558 #endif
559 #ifdef SEEK_HOLE
560 "HOLE",
561 #endif
562 };
563 static DEFINE_STRARRAY(whences);
564
565 static const char *fcntl_cmds[] = {
566 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
567 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
568 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
569 "F_GETOWNER_UIDS",
570 };
571 static DEFINE_STRARRAY(fcntl_cmds);
572
573 static const char *rlimit_resources[] = {
574 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
575 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
576 "RTTIME",
577 };
578 static DEFINE_STRARRAY(rlimit_resources);
579
580 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
581 static DEFINE_STRARRAY(sighow);
582
583 static const char *clockid[] = {
584 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
585 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE",
586 };
587 static DEFINE_STRARRAY(clockid);
588
589 static const char *socket_families[] = {
590 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
591 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
592 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
593 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
594 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
595 "ALG", "NFC", "VSOCK",
596 };
597 static DEFINE_STRARRAY(socket_families);
598
599 #ifndef SOCK_TYPE_MASK
600 #define SOCK_TYPE_MASK 0xf
601 #endif
602
603 static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size,
604 struct syscall_arg *arg)
605 {
606 size_t printed;
607 int type = arg->val,
608 flags = type & ~SOCK_TYPE_MASK;
609
610 type &= SOCK_TYPE_MASK;
611 /*
612 * Can't use a strarray, MIPS may override for ABI reasons.
613 */
614 switch (type) {
615 #define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
616 P_SK_TYPE(STREAM);
617 P_SK_TYPE(DGRAM);
618 P_SK_TYPE(RAW);
619 P_SK_TYPE(RDM);
620 P_SK_TYPE(SEQPACKET);
621 P_SK_TYPE(DCCP);
622 P_SK_TYPE(PACKET);
623 #undef P_SK_TYPE
624 default:
625 printed = scnprintf(bf, size, "%#x", type);
626 }
627
628 #define P_SK_FLAG(n) \
629 if (flags & SOCK_##n) { \
630 printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
631 flags &= ~SOCK_##n; \
632 }
633
634 P_SK_FLAG(CLOEXEC);
635 P_SK_FLAG(NONBLOCK);
636 #undef P_SK_FLAG
637
638 if (flags)
639 printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
640
641 return printed;
642 }
643
644 #define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
645
646 #ifndef MSG_PROBE
647 #define MSG_PROBE 0x10
648 #endif
649 #ifndef MSG_WAITFORONE
650 #define MSG_WAITFORONE 0x10000
651 #endif
652 #ifndef MSG_SENDPAGE_NOTLAST
653 #define MSG_SENDPAGE_NOTLAST 0x20000
654 #endif
655 #ifndef MSG_FASTOPEN
656 #define MSG_FASTOPEN 0x20000000
657 #endif
658
659 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
660 struct syscall_arg *arg)
661 {
662 int printed = 0, flags = arg->val;
663
664 if (flags == 0)
665 return scnprintf(bf, size, "NONE");
666 #define P_MSG_FLAG(n) \
667 if (flags & MSG_##n) { \
668 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
669 flags &= ~MSG_##n; \
670 }
671
672 P_MSG_FLAG(OOB);
673 P_MSG_FLAG(PEEK);
674 P_MSG_FLAG(DONTROUTE);
675 P_MSG_FLAG(TRYHARD);
676 P_MSG_FLAG(CTRUNC);
677 P_MSG_FLAG(PROBE);
678 P_MSG_FLAG(TRUNC);
679 P_MSG_FLAG(DONTWAIT);
680 P_MSG_FLAG(EOR);
681 P_MSG_FLAG(WAITALL);
682 P_MSG_FLAG(FIN);
683 P_MSG_FLAG(SYN);
684 P_MSG_FLAG(CONFIRM);
685 P_MSG_FLAG(RST);
686 P_MSG_FLAG(ERRQUEUE);
687 P_MSG_FLAG(NOSIGNAL);
688 P_MSG_FLAG(MORE);
689 P_MSG_FLAG(WAITFORONE);
690 P_MSG_FLAG(SENDPAGE_NOTLAST);
691 P_MSG_FLAG(FASTOPEN);
692 P_MSG_FLAG(CMSG_CLOEXEC);
693 #undef P_MSG_FLAG
694
695 if (flags)
696 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
697
698 return printed;
699 }
700
701 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
702
703 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
704 struct syscall_arg *arg)
705 {
706 size_t printed = 0;
707 int mode = arg->val;
708
709 if (mode == F_OK) /* 0 */
710 return scnprintf(bf, size, "F");
711 #define P_MODE(n) \
712 if (mode & n##_OK) { \
713 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
714 mode &= ~n##_OK; \
715 }
716
717 P_MODE(R);
718 P_MODE(W);
719 P_MODE(X);
720 #undef P_MODE
721
722 if (mode)
723 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
724
725 return printed;
726 }
727
728 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
729
730 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
731 struct syscall_arg *arg)
732 {
733 int printed = 0, flags = arg->val;
734
735 if (!(flags & O_CREAT))
736 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
737
738 if (flags == 0)
739 return scnprintf(bf, size, "RDONLY");
740 #define P_FLAG(n) \
741 if (flags & O_##n) { \
742 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
743 flags &= ~O_##n; \
744 }
745
746 P_FLAG(APPEND);
747 P_FLAG(ASYNC);
748 P_FLAG(CLOEXEC);
749 P_FLAG(CREAT);
750 P_FLAG(DIRECT);
751 P_FLAG(DIRECTORY);
752 P_FLAG(EXCL);
753 P_FLAG(LARGEFILE);
754 P_FLAG(NOATIME);
755 P_FLAG(NOCTTY);
756 #ifdef O_NONBLOCK
757 P_FLAG(NONBLOCK);
758 #elif O_NDELAY
759 P_FLAG(NDELAY);
760 #endif
761 #ifdef O_PATH
762 P_FLAG(PATH);
763 #endif
764 P_FLAG(RDWR);
765 #ifdef O_DSYNC
766 if ((flags & O_SYNC) == O_SYNC)
767 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
768 else {
769 P_FLAG(DSYNC);
770 }
771 #else
772 P_FLAG(SYNC);
773 #endif
774 P_FLAG(TRUNC);
775 P_FLAG(WRONLY);
776 #undef P_FLAG
777
778 if (flags)
779 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
780
781 return printed;
782 }
783
784 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
785
786 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
787 struct syscall_arg *arg)
788 {
789 int printed = 0, flags = arg->val;
790
791 if (flags == 0)
792 return scnprintf(bf, size, "NONE");
793 #define P_FLAG(n) \
794 if (flags & EFD_##n) { \
795 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
796 flags &= ~EFD_##n; \
797 }
798
799 P_FLAG(SEMAPHORE);
800 P_FLAG(CLOEXEC);
801 P_FLAG(NONBLOCK);
802 #undef P_FLAG
803
804 if (flags)
805 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
806
807 return printed;
808 }
809
810 #define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
811
812 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
813 struct syscall_arg *arg)
814 {
815 int printed = 0, flags = arg->val;
816
817 #define P_FLAG(n) \
818 if (flags & O_##n) { \
819 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
820 flags &= ~O_##n; \
821 }
822
823 P_FLAG(CLOEXEC);
824 P_FLAG(NONBLOCK);
825 #undef P_FLAG
826
827 if (flags)
828 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
829
830 return printed;
831 }
832
833 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
834
835 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
836 {
837 int sig = arg->val;
838
839 switch (sig) {
840 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
841 P_SIGNUM(HUP);
842 P_SIGNUM(INT);
843 P_SIGNUM(QUIT);
844 P_SIGNUM(ILL);
845 P_SIGNUM(TRAP);
846 P_SIGNUM(ABRT);
847 P_SIGNUM(BUS);
848 P_SIGNUM(FPE);
849 P_SIGNUM(KILL);
850 P_SIGNUM(USR1);
851 P_SIGNUM(SEGV);
852 P_SIGNUM(USR2);
853 P_SIGNUM(PIPE);
854 P_SIGNUM(ALRM);
855 P_SIGNUM(TERM);
856 P_SIGNUM(CHLD);
857 P_SIGNUM(CONT);
858 P_SIGNUM(STOP);
859 P_SIGNUM(TSTP);
860 P_SIGNUM(TTIN);
861 P_SIGNUM(TTOU);
862 P_SIGNUM(URG);
863 P_SIGNUM(XCPU);
864 P_SIGNUM(XFSZ);
865 P_SIGNUM(VTALRM);
866 P_SIGNUM(PROF);
867 P_SIGNUM(WINCH);
868 P_SIGNUM(IO);
869 P_SIGNUM(PWR);
870 P_SIGNUM(SYS);
871 #ifdef SIGEMT
872 P_SIGNUM(EMT);
873 #endif
874 #ifdef SIGSTKFLT
875 P_SIGNUM(STKFLT);
876 #endif
877 #ifdef SIGSWI
878 P_SIGNUM(SWI);
879 #endif
880 default: break;
881 }
882
883 return scnprintf(bf, size, "%#x", sig);
884 }
885
886 #define SCA_SIGNUM syscall_arg__scnprintf_signum
887
888 #if defined(__i386__) || defined(__x86_64__)
889 /*
890 * FIXME: Make this available to all arches.
891 */
892 #define TCGETS 0x5401
893
894 static const char *tioctls[] = {
895 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
896 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
897 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
898 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
899 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
900 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
901 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
902 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
903 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
904 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
905 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
906 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
907 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
908 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
909 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
910 };
911
912 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
913 #endif /* defined(__i386__) || defined(__x86_64__) */
914
915 #define STRARRAY(arg, name, array) \
916 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
917 .arg_parm = { [arg] = &strarray__##array, }
918
919 static struct syscall_fmt {
920 const char *name;
921 const char *alias;
922 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
923 void *arg_parm[6];
924 bool errmsg;
925 bool timeout;
926 bool hexret;
927 } syscall_fmts[] = {
928 { .name = "access", .errmsg = true,
929 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
930 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
931 { .name = "brk", .hexret = true,
932 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
933 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
934 { .name = "close", .errmsg = true,
935 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
936 { .name = "connect", .errmsg = true, },
937 { .name = "dup", .errmsg = true,
938 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
939 { .name = "dup2", .errmsg = true,
940 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
941 { .name = "dup3", .errmsg = true,
942 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
943 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
944 { .name = "eventfd2", .errmsg = true,
945 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
946 { .name = "faccessat", .errmsg = true,
947 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
948 { .name = "fadvise64", .errmsg = true,
949 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
950 { .name = "fallocate", .errmsg = true,
951 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
952 { .name = "fchdir", .errmsg = true,
953 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
954 { .name = "fchmod", .errmsg = true,
955 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956 { .name = "fchmodat", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
958 { .name = "fchown", .errmsg = true,
959 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
960 { .name = "fchownat", .errmsg = true,
961 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
962 { .name = "fcntl", .errmsg = true,
963 .arg_scnprintf = { [0] = SCA_FD, /* fd */
964 [1] = SCA_STRARRAY, /* cmd */ },
965 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
966 { .name = "fdatasync", .errmsg = true,
967 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
968 { .name = "flock", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_FD, /* fd */
970 [1] = SCA_FLOCK, /* cmd */ }, },
971 { .name = "fsetxattr", .errmsg = true,
972 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
973 { .name = "fstat", .errmsg = true, .alias = "newfstat",
974 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
975 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
976 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
977 { .name = "fstatfs", .errmsg = true,
978 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979 { .name = "fsync", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981 { .name = "ftruncate", .errmsg = true,
982 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
983 { .name = "futex", .errmsg = true,
984 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
985 { .name = "futimesat", .errmsg = true,
986 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
987 { .name = "getdents", .errmsg = true,
988 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
989 { .name = "getdents64", .errmsg = true,
990 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
991 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
992 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
993 { .name = "ioctl", .errmsg = true,
994 .arg_scnprintf = { [0] = SCA_FD, /* fd */
995 #if defined(__i386__) || defined(__x86_64__)
996 /*
997 * FIXME: Make this available to all arches.
998 */
999 [1] = SCA_STRHEXARRAY, /* cmd */
1000 [2] = SCA_HEX, /* arg */ },
1001 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1002 #else
1003 [2] = SCA_HEX, /* arg */ }, },
1004 #endif
1005 { .name = "kill", .errmsg = true,
1006 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1007 { .name = "linkat", .errmsg = true,
1008 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1009 { .name = "lseek", .errmsg = true,
1010 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1011 [2] = SCA_STRARRAY, /* whence */ },
1012 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1013 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
1014 { .name = "madvise", .errmsg = true,
1015 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1016 [2] = SCA_MADV_BHV, /* behavior */ }, },
1017 { .name = "mkdirat", .errmsg = true,
1018 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1019 { .name = "mknodat", .errmsg = true,
1020 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1021 { .name = "mlock", .errmsg = true,
1022 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1023 { .name = "mlockall", .errmsg = true,
1024 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1025 { .name = "mmap", .hexret = true,
1026 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1027 [2] = SCA_MMAP_PROT, /* prot */
1028 [3] = SCA_MMAP_FLAGS, /* flags */
1029 [4] = SCA_FD, /* fd */ }, },
1030 { .name = "mprotect", .errmsg = true,
1031 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1032 [2] = SCA_MMAP_PROT, /* prot */ }, },
1033 { .name = "mremap", .hexret = true,
1034 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1035 [3] = SCA_MREMAP_FLAGS, /* flags */
1036 [4] = SCA_HEX, /* new_addr */ }, },
1037 { .name = "munlock", .errmsg = true,
1038 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1039 { .name = "munmap", .errmsg = true,
1040 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1041 { .name = "name_to_handle_at", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1043 { .name = "newfstatat", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1045 { .name = "open", .errmsg = true,
1046 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1047 { .name = "open_by_handle_at", .errmsg = true,
1048 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1049 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1050 { .name = "openat", .errmsg = true,
1051 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1052 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1053 { .name = "pipe2", .errmsg = true,
1054 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1055 { .name = "poll", .errmsg = true, .timeout = true, },
1056 { .name = "ppoll", .errmsg = true, .timeout = true, },
1057 { .name = "pread", .errmsg = true, .alias = "pread64",
1058 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1059 { .name = "preadv", .errmsg = true, .alias = "pread",
1060 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1061 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1062 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1063 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1064 { .name = "pwritev", .errmsg = true,
1065 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1066 { .name = "read", .errmsg = true,
1067 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1068 { .name = "readlinkat", .errmsg = true,
1069 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1070 { .name = "readv", .errmsg = true,
1071 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1072 { .name = "recvfrom", .errmsg = true,
1073 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1074 { .name = "recvmmsg", .errmsg = true,
1075 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1076 { .name = "recvmsg", .errmsg = true,
1077 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1078 { .name = "renameat", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1080 { .name = "rt_sigaction", .errmsg = true,
1081 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1082 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1083 { .name = "rt_sigqueueinfo", .errmsg = true,
1084 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1085 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1086 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1087 { .name = "select", .errmsg = true, .timeout = true, },
1088 { .name = "sendmmsg", .errmsg = true,
1089 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1090 { .name = "sendmsg", .errmsg = true,
1091 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
1092 { .name = "sendto", .errmsg = true,
1093 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
1094 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1095 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1096 { .name = "shutdown", .errmsg = true,
1097 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1098 { .name = "socket", .errmsg = true,
1099 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1100 [1] = SCA_SK_TYPE, /* type */ },
1101 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1102 { .name = "socketpair", .errmsg = true,
1103 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1104 [1] = SCA_SK_TYPE, /* type */ },
1105 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1106 { .name = "stat", .errmsg = true, .alias = "newstat", },
1107 { .name = "symlinkat", .errmsg = true,
1108 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1109 { .name = "tgkill", .errmsg = true,
1110 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1111 { .name = "tkill", .errmsg = true,
1112 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1113 { .name = "uname", .errmsg = true, .alias = "newuname", },
1114 { .name = "unlinkat", .errmsg = true,
1115 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1116 { .name = "utimensat", .errmsg = true,
1117 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
1118 { .name = "write", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1120 { .name = "writev", .errmsg = true,
1121 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1122 };
1123
1124 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1125 {
1126 const struct syscall_fmt *fmt = fmtp;
1127 return strcmp(name, fmt->name);
1128 }
1129
1130 static struct syscall_fmt *syscall_fmt__find(const char *name)
1131 {
1132 const int nmemb = ARRAY_SIZE(syscall_fmts);
1133 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1134 }
1135
1136 struct syscall {
1137 struct event_format *tp_format;
1138 int nr_args;
1139 struct format_field *args;
1140 const char *name;
1141 bool filtered;
1142 bool is_exit;
1143 struct syscall_fmt *fmt;
1144 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1145 void **arg_parm;
1146 };
1147
1148 static size_t fprintf_duration(unsigned long t, FILE *fp)
1149 {
1150 double duration = (double)t / NSEC_PER_MSEC;
1151 size_t printed = fprintf(fp, "(");
1152
1153 if (duration >= 1.0)
1154 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1155 else if (duration >= 0.01)
1156 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1157 else
1158 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1159 return printed + fprintf(fp, "): ");
1160 }
1161
1162 struct thread_trace {
1163 u64 entry_time;
1164 u64 exit_time;
1165 bool entry_pending;
1166 unsigned long nr_events;
1167 unsigned long pfmaj, pfmin;
1168 char *entry_str;
1169 double runtime_ms;
1170 struct {
1171 int max;
1172 char **table;
1173 } paths;
1174
1175 struct intlist *syscall_stats;
1176 };
1177
1178 static struct thread_trace *thread_trace__new(void)
1179 {
1180 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1181
1182 if (ttrace)
1183 ttrace->paths.max = -1;
1184
1185 ttrace->syscall_stats = intlist__new(NULL);
1186
1187 return ttrace;
1188 }
1189
1190 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1191 {
1192 struct thread_trace *ttrace;
1193
1194 if (thread == NULL)
1195 goto fail;
1196
1197 if (thread__priv(thread) == NULL)
1198 thread__set_priv(thread, thread_trace__new());
1199
1200 if (thread__priv(thread) == NULL)
1201 goto fail;
1202
1203 ttrace = thread__priv(thread);
1204 ++ttrace->nr_events;
1205
1206 return ttrace;
1207 fail:
1208 color_fprintf(fp, PERF_COLOR_RED,
1209 "WARNING: not enough memory, dropping samples!\n");
1210 return NULL;
1211 }
1212
1213 #define TRACE_PFMAJ (1 << 0)
1214 #define TRACE_PFMIN (1 << 1)
1215
1216 struct trace {
1217 struct perf_tool tool;
1218 struct {
1219 int machine;
1220 int open_id;
1221 } audit;
1222 struct {
1223 int max;
1224 struct syscall *table;
1225 } syscalls;
1226 struct record_opts opts;
1227 struct perf_evlist *evlist;
1228 struct machine *host;
1229 struct thread *current;
1230 u64 base_time;
1231 FILE *output;
1232 unsigned long nr_events;
1233 struct strlist *ev_qualifier;
1234 const char *last_vfs_getname;
1235 struct intlist *tid_list;
1236 struct intlist *pid_list;
1237 struct {
1238 size_t nr;
1239 pid_t *entries;
1240 } filter_pids;
1241 double duration_filter;
1242 double runtime_ms;
1243 struct {
1244 u64 vfs_getname,
1245 proc_getname;
1246 } stats;
1247 bool not_ev_qualifier;
1248 bool live;
1249 bool full_time;
1250 bool sched;
1251 bool multiple_threads;
1252 bool summary;
1253 bool summary_only;
1254 bool show_comm;
1255 bool show_tool_stats;
1256 bool trace_syscalls;
1257 bool force;
1258 int trace_pgfaults;
1259 };
1260
1261 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1262 {
1263 struct thread_trace *ttrace = thread__priv(thread);
1264
1265 if (fd > ttrace->paths.max) {
1266 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1267
1268 if (npath == NULL)
1269 return -1;
1270
1271 if (ttrace->paths.max != -1) {
1272 memset(npath + ttrace->paths.max + 1, 0,
1273 (fd - ttrace->paths.max) * sizeof(char *));
1274 } else {
1275 memset(npath, 0, (fd + 1) * sizeof(char *));
1276 }
1277
1278 ttrace->paths.table = npath;
1279 ttrace->paths.max = fd;
1280 }
1281
1282 ttrace->paths.table[fd] = strdup(pathname);
1283
1284 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1285 }
1286
1287 static int thread__read_fd_path(struct thread *thread, int fd)
1288 {
1289 char linkname[PATH_MAX], pathname[PATH_MAX];
1290 struct stat st;
1291 int ret;
1292
1293 if (thread->pid_ == thread->tid) {
1294 scnprintf(linkname, sizeof(linkname),
1295 "/proc/%d/fd/%d", thread->pid_, fd);
1296 } else {
1297 scnprintf(linkname, sizeof(linkname),
1298 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1299 }
1300
1301 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1302 return -1;
1303
1304 ret = readlink(linkname, pathname, sizeof(pathname));
1305
1306 if (ret < 0 || ret > st.st_size)
1307 return -1;
1308
1309 pathname[ret] = '\0';
1310 return trace__set_fd_pathname(thread, fd, pathname);
1311 }
1312
1313 static const char *thread__fd_path(struct thread *thread, int fd,
1314 struct trace *trace)
1315 {
1316 struct thread_trace *ttrace = thread__priv(thread);
1317
1318 if (ttrace == NULL)
1319 return NULL;
1320
1321 if (fd < 0)
1322 return NULL;
1323
1324 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1325 if (!trace->live)
1326 return NULL;
1327 ++trace->stats.proc_getname;
1328 if (thread__read_fd_path(thread, fd))
1329 return NULL;
1330 }
1331
1332 return ttrace->paths.table[fd];
1333 }
1334
1335 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1336 struct syscall_arg *arg)
1337 {
1338 int fd = arg->val;
1339 size_t printed = scnprintf(bf, size, "%d", fd);
1340 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1341
1342 if (path)
1343 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1344
1345 return printed;
1346 }
1347
1348 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1349 struct syscall_arg *arg)
1350 {
1351 int fd = arg->val;
1352 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1353 struct thread_trace *ttrace = thread__priv(arg->thread);
1354
1355 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1356 zfree(&ttrace->paths.table[fd]);
1357
1358 return printed;
1359 }
1360
1361 static bool trace__filter_duration(struct trace *trace, double t)
1362 {
1363 return t < (trace->duration_filter * NSEC_PER_MSEC);
1364 }
1365
1366 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1367 {
1368 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1369
1370 return fprintf(fp, "%10.3f ", ts);
1371 }
1372
1373 static bool done = false;
1374 static bool interrupted = false;
1375
1376 static void sig_handler(int sig)
1377 {
1378 done = true;
1379 interrupted = sig == SIGINT;
1380 }
1381
1382 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1383 u64 duration, u64 tstamp, FILE *fp)
1384 {
1385 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1386 printed += fprintf_duration(duration, fp);
1387
1388 if (trace->multiple_threads) {
1389 if (trace->show_comm)
1390 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1391 printed += fprintf(fp, "%d ", thread->tid);
1392 }
1393
1394 return printed;
1395 }
1396
1397 static int trace__process_event(struct trace *trace, struct machine *machine,
1398 union perf_event *event, struct perf_sample *sample)
1399 {
1400 int ret = 0;
1401
1402 switch (event->header.type) {
1403 case PERF_RECORD_LOST:
1404 color_fprintf(trace->output, PERF_COLOR_RED,
1405 "LOST %" PRIu64 " events!\n", event->lost.lost);
1406 ret = machine__process_lost_event(machine, event, sample);
1407 default:
1408 ret = machine__process_event(machine, event, sample);
1409 break;
1410 }
1411
1412 return ret;
1413 }
1414
1415 static int trace__tool_process(struct perf_tool *tool,
1416 union perf_event *event,
1417 struct perf_sample *sample,
1418 struct machine *machine)
1419 {
1420 struct trace *trace = container_of(tool, struct trace, tool);
1421 return trace__process_event(trace, machine, event, sample);
1422 }
1423
1424 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1425 {
1426 int err = symbol__init(NULL);
1427
1428 if (err)
1429 return err;
1430
1431 trace->host = machine__new_host();
1432 if (trace->host == NULL)
1433 return -ENOMEM;
1434
1435 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1436 evlist->threads, trace__tool_process, false);
1437 if (err)
1438 symbol__exit();
1439
1440 return err;
1441 }
1442
1443 static int syscall__set_arg_fmts(struct syscall *sc)
1444 {
1445 struct format_field *field;
1446 int idx = 0;
1447
1448 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1449 if (sc->arg_scnprintf == NULL)
1450 return -1;
1451
1452 if (sc->fmt)
1453 sc->arg_parm = sc->fmt->arg_parm;
1454
1455 for (field = sc->args; field; field = field->next) {
1456 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1457 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1458 else if (field->flags & FIELD_IS_POINTER)
1459 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1460 ++idx;
1461 }
1462
1463 return 0;
1464 }
1465
1466 static int trace__read_syscall_info(struct trace *trace, int id)
1467 {
1468 char tp_name[128];
1469 struct syscall *sc;
1470 const char *name = audit_syscall_to_name(id, trace->audit.machine);
1471
1472 if (name == NULL)
1473 return -1;
1474
1475 if (id > trace->syscalls.max) {
1476 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1477
1478 if (nsyscalls == NULL)
1479 return -1;
1480
1481 if (trace->syscalls.max != -1) {
1482 memset(nsyscalls + trace->syscalls.max + 1, 0,
1483 (id - trace->syscalls.max) * sizeof(*sc));
1484 } else {
1485 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1486 }
1487
1488 trace->syscalls.table = nsyscalls;
1489 trace->syscalls.max = id;
1490 }
1491
1492 sc = trace->syscalls.table + id;
1493 sc->name = name;
1494
1495 if (trace->ev_qualifier) {
1496 bool in = strlist__find(trace->ev_qualifier, name) != NULL;
1497
1498 if (!(in ^ trace->not_ev_qualifier)) {
1499 sc->filtered = true;
1500 /*
1501 * No need to do read tracepoint information since this will be
1502 * filtered out.
1503 */
1504 return 0;
1505 }
1506 }
1507
1508 sc->fmt = syscall_fmt__find(sc->name);
1509
1510 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1511 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1512
1513 if (sc->tp_format == NULL && sc->fmt && sc->fmt->alias) {
1514 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1515 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1516 }
1517
1518 if (sc->tp_format == NULL)
1519 return -1;
1520
1521 sc->args = sc->tp_format->format.fields;
1522 sc->nr_args = sc->tp_format->format.nr_fields;
1523 /* drop nr field - not relevant here; does not exist on older kernels */
1524 if (sc->args && strcmp(sc->args->name, "nr") == 0) {
1525 sc->args = sc->args->next;
1526 --sc->nr_args;
1527 }
1528
1529 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1530
1531 return syscall__set_arg_fmts(sc);
1532 }
1533
1534 /*
1535 * args is to be interpreted as a series of longs but we need to handle
1536 * 8-byte unaligned accesses. args points to raw_data within the event
1537 * and raw_data is guaranteed to be 8-byte unaligned because it is
1538 * preceded by raw_size which is a u32. So we need to copy args to a temp
1539 * variable to read it. Most notably this avoids extended load instructions
1540 * on unaligned addresses
1541 */
1542
1543 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1544 unsigned char *args, struct trace *trace,
1545 struct thread *thread)
1546 {
1547 size_t printed = 0;
1548 unsigned char *p;
1549 unsigned long val;
1550
1551 if (sc->args != NULL) {
1552 struct format_field *field;
1553 u8 bit = 1;
1554 struct syscall_arg arg = {
1555 .idx = 0,
1556 .mask = 0,
1557 .trace = trace,
1558 .thread = thread,
1559 };
1560
1561 for (field = sc->args; field;
1562 field = field->next, ++arg.idx, bit <<= 1) {
1563 if (arg.mask & bit)
1564 continue;
1565
1566 /* special care for unaligned accesses */
1567 p = args + sizeof(unsigned long) * arg.idx;
1568 memcpy(&val, p, sizeof(val));
1569
1570 /*
1571 * Suppress this argument if its value is zero and
1572 * and we don't have a string associated in an
1573 * strarray for it.
1574 */
1575 if (val == 0 &&
1576 !(sc->arg_scnprintf &&
1577 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1578 sc->arg_parm[arg.idx]))
1579 continue;
1580
1581 printed += scnprintf(bf + printed, size - printed,
1582 "%s%s: ", printed ? ", " : "", field->name);
1583 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1584 arg.val = val;
1585 if (sc->arg_parm)
1586 arg.parm = sc->arg_parm[arg.idx];
1587 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1588 size - printed, &arg);
1589 } else {
1590 printed += scnprintf(bf + printed, size - printed,
1591 "%ld", val);
1592 }
1593 }
1594 } else {
1595 int i = 0;
1596
1597 while (i < 6) {
1598 /* special care for unaligned accesses */
1599 p = args + sizeof(unsigned long) * i;
1600 memcpy(&val, p, sizeof(val));
1601 printed += scnprintf(bf + printed, size - printed,
1602 "%sarg%d: %ld",
1603 printed ? ", " : "", i, val);
1604 ++i;
1605 }
1606 }
1607
1608 return printed;
1609 }
1610
1611 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1612 union perf_event *event,
1613 struct perf_sample *sample);
1614
1615 static struct syscall *trace__syscall_info(struct trace *trace,
1616 struct perf_evsel *evsel, int id)
1617 {
1618
1619 if (id < 0) {
1620
1621 /*
1622 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1623 * before that, leaving at a higher verbosity level till that is
1624 * explained. Reproduced with plain ftrace with:
1625 *
1626 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1627 * grep "NR -1 " /t/trace_pipe
1628 *
1629 * After generating some load on the machine.
1630 */
1631 if (verbose > 1) {
1632 static u64 n;
1633 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1634 id, perf_evsel__name(evsel), ++n);
1635 }
1636 return NULL;
1637 }
1638
1639 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1640 trace__read_syscall_info(trace, id))
1641 goto out_cant_read;
1642
1643 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1644 goto out_cant_read;
1645
1646 return &trace->syscalls.table[id];
1647
1648 out_cant_read:
1649 if (verbose) {
1650 fprintf(trace->output, "Problems reading syscall %d", id);
1651 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1652 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1653 fputs(" information\n", trace->output);
1654 }
1655 return NULL;
1656 }
1657
1658 static void thread__update_stats(struct thread_trace *ttrace,
1659 int id, struct perf_sample *sample)
1660 {
1661 struct int_node *inode;
1662 struct stats *stats;
1663 u64 duration = 0;
1664
1665 inode = intlist__findnew(ttrace->syscall_stats, id);
1666 if (inode == NULL)
1667 return;
1668
1669 stats = inode->priv;
1670 if (stats == NULL) {
1671 stats = malloc(sizeof(struct stats));
1672 if (stats == NULL)
1673 return;
1674 init_stats(stats);
1675 inode->priv = stats;
1676 }
1677
1678 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1679 duration = sample->time - ttrace->entry_time;
1680
1681 update_stats(stats, duration);
1682 }
1683
1684 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1685 {
1686 struct thread_trace *ttrace;
1687 u64 duration;
1688 size_t printed;
1689
1690 if (trace->current == NULL)
1691 return 0;
1692
1693 ttrace = thread__priv(trace->current);
1694
1695 if (!ttrace->entry_pending)
1696 return 0;
1697
1698 duration = sample->time - ttrace->entry_time;
1699
1700 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1701 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1702 ttrace->entry_pending = false;
1703
1704 return printed;
1705 }
1706
1707 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1708 union perf_event *event __maybe_unused,
1709 struct perf_sample *sample)
1710 {
1711 char *msg;
1712 void *args;
1713 size_t printed = 0;
1714 struct thread *thread;
1715 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1716 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1717 struct thread_trace *ttrace;
1718
1719 if (sc == NULL)
1720 return -1;
1721
1722 if (sc->filtered)
1723 return 0;
1724
1725 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1726 ttrace = thread__trace(thread, trace->output);
1727 if (ttrace == NULL)
1728 return -1;
1729
1730 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1731
1732 if (ttrace->entry_str == NULL) {
1733 ttrace->entry_str = malloc(1024);
1734 if (!ttrace->entry_str)
1735 return -1;
1736 }
1737
1738 if (!trace->summary_only)
1739 trace__printf_interrupted_entry(trace, sample);
1740
1741 ttrace->entry_time = sample->time;
1742 msg = ttrace->entry_str;
1743 printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name);
1744
1745 printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed,
1746 args, trace, thread);
1747
1748 if (sc->is_exit) {
1749 if (!trace->duration_filter && !trace->summary_only) {
1750 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1751 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1752 }
1753 } else
1754 ttrace->entry_pending = true;
1755
1756 if (trace->current != thread) {
1757 thread__put(trace->current);
1758 trace->current = thread__get(thread);
1759 }
1760
1761 return 0;
1762 }
1763
1764 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1765 union perf_event *event __maybe_unused,
1766 struct perf_sample *sample)
1767 {
1768 long ret;
1769 u64 duration = 0;
1770 struct thread *thread;
1771 int id = perf_evsel__sc_tp_uint(evsel, id, sample);
1772 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1773 struct thread_trace *ttrace;
1774
1775 if (sc == NULL)
1776 return -1;
1777
1778 if (sc->filtered)
1779 return 0;
1780
1781 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1782 ttrace = thread__trace(thread, trace->output);
1783 if (ttrace == NULL)
1784 return -1;
1785
1786 if (trace->summary)
1787 thread__update_stats(ttrace, id, sample);
1788
1789 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1790
1791 if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) {
1792 trace__set_fd_pathname(thread, ret, trace->last_vfs_getname);
1793 trace->last_vfs_getname = NULL;
1794 ++trace->stats.vfs_getname;
1795 }
1796
1797 ttrace->exit_time = sample->time;
1798
1799 if (ttrace->entry_time) {
1800 duration = sample->time - ttrace->entry_time;
1801 if (trace__filter_duration(trace, duration))
1802 goto out;
1803 } else if (trace->duration_filter)
1804 goto out;
1805
1806 if (trace->summary_only)
1807 goto out;
1808
1809 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1810
1811 if (ttrace->entry_pending) {
1812 fprintf(trace->output, "%-70s", ttrace->entry_str);
1813 } else {
1814 fprintf(trace->output, " ... [");
1815 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1816 fprintf(trace->output, "]: %s()", sc->name);
1817 }
1818
1819 if (sc->fmt == NULL) {
1820 signed_print:
1821 fprintf(trace->output, ") = %ld", ret);
1822 } else if (ret < 0 && sc->fmt->errmsg) {
1823 char bf[STRERR_BUFSIZE];
1824 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1825 *e = audit_errno_to_name(-ret);
1826
1827 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1828 } else if (ret == 0 && sc->fmt->timeout)
1829 fprintf(trace->output, ") = 0 Timeout");
1830 else if (sc->fmt->hexret)
1831 fprintf(trace->output, ") = %#lx", ret);
1832 else
1833 goto signed_print;
1834
1835 fputc('\n', trace->output);
1836 out:
1837 ttrace->entry_pending = false;
1838
1839 return 0;
1840 }
1841
1842 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
1843 union perf_event *event __maybe_unused,
1844 struct perf_sample *sample)
1845 {
1846 trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname");
1847 return 0;
1848 }
1849
1850 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
1851 union perf_event *event __maybe_unused,
1852 struct perf_sample *sample)
1853 {
1854 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1855 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
1856 struct thread *thread = machine__findnew_thread(trace->host,
1857 sample->pid,
1858 sample->tid);
1859 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1860
1861 if (ttrace == NULL)
1862 goto out_dump;
1863
1864 ttrace->runtime_ms += runtime_ms;
1865 trace->runtime_ms += runtime_ms;
1866 return 0;
1867
1868 out_dump:
1869 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1870 evsel->name,
1871 perf_evsel__strval(evsel, sample, "comm"),
1872 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1873 runtime,
1874 perf_evsel__intval(evsel, sample, "vruntime"));
1875 return 0;
1876 }
1877
1878 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1879 union perf_event *event __maybe_unused,
1880 struct perf_sample *sample)
1881 {
1882 trace__printf_interrupted_entry(trace, sample);
1883 trace__fprintf_tstamp(trace, sample->time, trace->output);
1884
1885 if (trace->trace_syscalls)
1886 fprintf(trace->output, "( ): ");
1887
1888 fprintf(trace->output, "%s:", evsel->name);
1889
1890 if (evsel->tp_format) {
1891 event_format__fprintf(evsel->tp_format, sample->cpu,
1892 sample->raw_data, sample->raw_size,
1893 trace->output);
1894 }
1895
1896 fprintf(trace->output, ")\n");
1897 return 0;
1898 }
1899
1900 static void print_location(FILE *f, struct perf_sample *sample,
1901 struct addr_location *al,
1902 bool print_dso, bool print_sym)
1903 {
1904
1905 if ((verbose || print_dso) && al->map)
1906 fprintf(f, "%s@", al->map->dso->long_name);
1907
1908 if ((verbose || print_sym) && al->sym)
1909 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
1910 al->addr - al->sym->start);
1911 else if (al->map)
1912 fprintf(f, "0x%" PRIx64, al->addr);
1913 else
1914 fprintf(f, "0x%" PRIx64, sample->addr);
1915 }
1916
1917 static int trace__pgfault(struct trace *trace,
1918 struct perf_evsel *evsel,
1919 union perf_event *event,
1920 struct perf_sample *sample)
1921 {
1922 struct thread *thread;
1923 u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
1924 struct addr_location al;
1925 char map_type = 'd';
1926 struct thread_trace *ttrace;
1927
1928 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1929 ttrace = thread__trace(thread, trace->output);
1930 if (ttrace == NULL)
1931 return -1;
1932
1933 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1934 ttrace->pfmaj++;
1935 else
1936 ttrace->pfmin++;
1937
1938 if (trace->summary_only)
1939 return 0;
1940
1941 thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
1942 sample->ip, &al);
1943
1944 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1945
1946 fprintf(trace->output, "%sfault [",
1947 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1948 "maj" : "min");
1949
1950 print_location(trace->output, sample, &al, false, true);
1951
1952 fprintf(trace->output, "] => ");
1953
1954 thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
1955 sample->addr, &al);
1956
1957 if (!al.map) {
1958 thread__find_addr_location(thread, cpumode,
1959 MAP__FUNCTION, sample->addr, &al);
1960
1961 if (al.map)
1962 map_type = 'x';
1963 else
1964 map_type = '?';
1965 }
1966
1967 print_location(trace->output, sample, &al, true, false);
1968
1969 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
1970
1971 return 0;
1972 }
1973
1974 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
1975 {
1976 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
1977 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
1978 return false;
1979
1980 if (trace->pid_list || trace->tid_list)
1981 return true;
1982
1983 return false;
1984 }
1985
1986 static int trace__process_sample(struct perf_tool *tool,
1987 union perf_event *event,
1988 struct perf_sample *sample,
1989 struct perf_evsel *evsel,
1990 struct machine *machine __maybe_unused)
1991 {
1992 struct trace *trace = container_of(tool, struct trace, tool);
1993 int err = 0;
1994
1995 tracepoint_handler handler = evsel->handler;
1996
1997 if (skip_sample(trace, sample))
1998 return 0;
1999
2000 if (!trace->full_time && trace->base_time == 0)
2001 trace->base_time = sample->time;
2002
2003 if (handler) {
2004 ++trace->nr_events;
2005 handler(trace, evsel, event, sample);
2006 }
2007
2008 return err;
2009 }
2010
2011 static int parse_target_str(struct trace *trace)
2012 {
2013 if (trace->opts.target.pid) {
2014 trace->pid_list = intlist__new(trace->opts.target.pid);
2015 if (trace->pid_list == NULL) {
2016 pr_err("Error parsing process id string\n");
2017 return -EINVAL;
2018 }
2019 }
2020
2021 if (trace->opts.target.tid) {
2022 trace->tid_list = intlist__new(trace->opts.target.tid);
2023 if (trace->tid_list == NULL) {
2024 pr_err("Error parsing thread id string\n");
2025 return -EINVAL;
2026 }
2027 }
2028
2029 return 0;
2030 }
2031
2032 static int trace__record(struct trace *trace, int argc, const char **argv)
2033 {
2034 unsigned int rec_argc, i, j;
2035 const char **rec_argv;
2036 const char * const record_args[] = {
2037 "record",
2038 "-R",
2039 "-m", "1024",
2040 "-c", "1",
2041 };
2042
2043 const char * const sc_args[] = { "-e", };
2044 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2045 const char * const majpf_args[] = { "-e", "major-faults" };
2046 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2047 const char * const minpf_args[] = { "-e", "minor-faults" };
2048 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2049
2050 /* +1 is for the event string below */
2051 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2052 majpf_args_nr + minpf_args_nr + argc;
2053 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2054
2055 if (rec_argv == NULL)
2056 return -ENOMEM;
2057
2058 j = 0;
2059 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2060 rec_argv[j++] = record_args[i];
2061
2062 if (trace->trace_syscalls) {
2063 for (i = 0; i < sc_args_nr; i++)
2064 rec_argv[j++] = sc_args[i];
2065
2066 /* event string may be different for older kernels - e.g., RHEL6 */
2067 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2068 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2069 else if (is_valid_tracepoint("syscalls:sys_enter"))
2070 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2071 else {
2072 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2073 return -1;
2074 }
2075 }
2076
2077 if (trace->trace_pgfaults & TRACE_PFMAJ)
2078 for (i = 0; i < majpf_args_nr; i++)
2079 rec_argv[j++] = majpf_args[i];
2080
2081 if (trace->trace_pgfaults & TRACE_PFMIN)
2082 for (i = 0; i < minpf_args_nr; i++)
2083 rec_argv[j++] = minpf_args[i];
2084
2085 for (i = 0; i < (unsigned int)argc; i++)
2086 rec_argv[j++] = argv[i];
2087
2088 return cmd_record(j, rec_argv, NULL);
2089 }
2090
2091 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2092
2093 static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2094 {
2095 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2096 if (evsel == NULL)
2097 return;
2098
2099 if (perf_evsel__field(evsel, "pathname") == NULL) {
2100 perf_evsel__delete(evsel);
2101 return;
2102 }
2103
2104 evsel->handler = trace__vfs_getname;
2105 perf_evlist__add(evlist, evsel);
2106 }
2107
2108 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2109 u64 config)
2110 {
2111 struct perf_evsel *evsel;
2112 struct perf_event_attr attr = {
2113 .type = PERF_TYPE_SOFTWARE,
2114 .mmap_data = 1,
2115 };
2116
2117 attr.config = config;
2118 attr.sample_period = 1;
2119
2120 event_attr_init(&attr);
2121
2122 evsel = perf_evsel__new(&attr);
2123 if (!evsel)
2124 return -ENOMEM;
2125
2126 evsel->handler = trace__pgfault;
2127 perf_evlist__add(evlist, evsel);
2128
2129 return 0;
2130 }
2131
2132 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2133 {
2134 const u32 type = event->header.type;
2135 struct perf_evsel *evsel;
2136
2137 if (!trace->full_time && trace->base_time == 0)
2138 trace->base_time = sample->time;
2139
2140 if (type != PERF_RECORD_SAMPLE) {
2141 trace__process_event(trace, trace->host, event, sample);
2142 return;
2143 }
2144
2145 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2146 if (evsel == NULL) {
2147 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2148 return;
2149 }
2150
2151 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2152 sample->raw_data == NULL) {
2153 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2154 perf_evsel__name(evsel), sample->tid,
2155 sample->cpu, sample->raw_size);
2156 } else {
2157 tracepoint_handler handler = evsel->handler;
2158 handler(trace, evsel, event, sample);
2159 }
2160 }
2161
2162 static int trace__run(struct trace *trace, int argc, const char **argv)
2163 {
2164 struct perf_evlist *evlist = trace->evlist;
2165 int err = -1, i;
2166 unsigned long before;
2167 const bool forks = argc > 0;
2168 bool draining = false;
2169
2170 trace->live = true;
2171
2172 if (trace->trace_syscalls &&
2173 perf_evlist__add_syscall_newtp(evlist, trace__sys_enter,
2174 trace__sys_exit))
2175 goto out_error_raw_syscalls;
2176
2177 if (trace->trace_syscalls)
2178 perf_evlist__add_vfs_getname(evlist);
2179
2180 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2181 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2182 goto out_error_mem;
2183 }
2184
2185 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2186 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2187 goto out_error_mem;
2188
2189 if (trace->sched &&
2190 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2191 trace__sched_stat_runtime))
2192 goto out_error_sched_stat_runtime;
2193
2194 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2195 if (err < 0) {
2196 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2197 goto out_delete_evlist;
2198 }
2199
2200 err = trace__symbols_init(trace, evlist);
2201 if (err < 0) {
2202 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2203 goto out_delete_evlist;
2204 }
2205
2206 perf_evlist__config(evlist, &trace->opts);
2207
2208 signal(SIGCHLD, sig_handler);
2209 signal(SIGINT, sig_handler);
2210
2211 if (forks) {
2212 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2213 argv, false, NULL);
2214 if (err < 0) {
2215 fprintf(trace->output, "Couldn't run the workload!\n");
2216 goto out_delete_evlist;
2217 }
2218 }
2219
2220 err = perf_evlist__open(evlist);
2221 if (err < 0)
2222 goto out_error_open;
2223
2224 /*
2225 * Better not use !target__has_task() here because we need to cover the
2226 * case where no threads were specified in the command line, but a
2227 * workload was, and in that case we will fill in the thread_map when
2228 * we fork the workload in perf_evlist__prepare_workload.
2229 */
2230 if (trace->filter_pids.nr > 0)
2231 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2232 else if (evlist->threads->map[0] == -1)
2233 err = perf_evlist__set_filter_pid(evlist, getpid());
2234
2235 if (err < 0) {
2236 printf("err=%d,%s\n", -err, strerror(-err));
2237 exit(1);
2238 }
2239
2240 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2241 if (err < 0)
2242 goto out_error_mmap;
2243
2244 if (forks)
2245 perf_evlist__start_workload(evlist);
2246 else
2247 perf_evlist__enable(evlist);
2248
2249 trace->multiple_threads = evlist->threads->map[0] == -1 ||
2250 evlist->threads->nr > 1 ||
2251 perf_evlist__first(evlist)->attr.inherit;
2252 again:
2253 before = trace->nr_events;
2254
2255 for (i = 0; i < evlist->nr_mmaps; i++) {
2256 union perf_event *event;
2257
2258 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2259 struct perf_sample sample;
2260
2261 ++trace->nr_events;
2262
2263 err = perf_evlist__parse_sample(evlist, event, &sample);
2264 if (err) {
2265 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2266 goto next_event;
2267 }
2268
2269 trace__handle_event(trace, event, &sample);
2270 next_event:
2271 perf_evlist__mmap_consume(evlist, i);
2272
2273 if (interrupted)
2274 goto out_disable;
2275 }
2276 }
2277
2278 if (trace->nr_events == before) {
2279 int timeout = done ? 100 : -1;
2280
2281 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2282 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2283 draining = true;
2284
2285 goto again;
2286 }
2287 } else {
2288 goto again;
2289 }
2290
2291 out_disable:
2292 thread__zput(trace->current);
2293
2294 perf_evlist__disable(evlist);
2295
2296 if (!err) {
2297 if (trace->summary)
2298 trace__fprintf_thread_summary(trace, trace->output);
2299
2300 if (trace->show_tool_stats) {
2301 fprintf(trace->output, "Stats:\n "
2302 " vfs_getname : %" PRIu64 "\n"
2303 " proc_getname: %" PRIu64 "\n",
2304 trace->stats.vfs_getname,
2305 trace->stats.proc_getname);
2306 }
2307 }
2308
2309 out_delete_evlist:
2310 perf_evlist__delete(evlist);
2311 trace->evlist = NULL;
2312 trace->live = false;
2313 return err;
2314 {
2315 char errbuf[BUFSIZ];
2316
2317 out_error_sched_stat_runtime:
2318 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2319 goto out_error;
2320
2321 out_error_raw_syscalls:
2322 debugfs__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2323 goto out_error;
2324
2325 out_error_mmap:
2326 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2327 goto out_error;
2328
2329 out_error_open:
2330 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2331
2332 out_error:
2333 fprintf(trace->output, "%s\n", errbuf);
2334 goto out_delete_evlist;
2335 }
2336 out_error_mem:
2337 fprintf(trace->output, "Not enough memory to run!\n");
2338 goto out_delete_evlist;
2339 }
2340
2341 static int trace__replay(struct trace *trace)
2342 {
2343 const struct perf_evsel_str_handler handlers[] = {
2344 { "probe:vfs_getname", trace__vfs_getname, },
2345 };
2346 struct perf_data_file file = {
2347 .path = input_name,
2348 .mode = PERF_DATA_MODE_READ,
2349 .force = trace->force,
2350 };
2351 struct perf_session *session;
2352 struct perf_evsel *evsel;
2353 int err = -1;
2354
2355 trace->tool.sample = trace__process_sample;
2356 trace->tool.mmap = perf_event__process_mmap;
2357 trace->tool.mmap2 = perf_event__process_mmap2;
2358 trace->tool.comm = perf_event__process_comm;
2359 trace->tool.exit = perf_event__process_exit;
2360 trace->tool.fork = perf_event__process_fork;
2361 trace->tool.attr = perf_event__process_attr;
2362 trace->tool.tracing_data = perf_event__process_tracing_data;
2363 trace->tool.build_id = perf_event__process_build_id;
2364
2365 trace->tool.ordered_events = true;
2366 trace->tool.ordering_requires_timestamps = true;
2367
2368 /* add tid to output */
2369 trace->multiple_threads = true;
2370
2371 session = perf_session__new(&file, false, &trace->tool);
2372 if (session == NULL)
2373 return -1;
2374
2375 if (symbol__init(&session->header.env) < 0)
2376 goto out;
2377
2378 trace->host = &session->machines.host;
2379
2380 err = perf_session__set_tracepoints_handlers(session, handlers);
2381 if (err)
2382 goto out;
2383
2384 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2385 "raw_syscalls:sys_enter");
2386 /* older kernels have syscalls tp versus raw_syscalls */
2387 if (evsel == NULL)
2388 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2389 "syscalls:sys_enter");
2390
2391 if (evsel &&
2392 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2393 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2394 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2395 goto out;
2396 }
2397
2398 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2399 "raw_syscalls:sys_exit");
2400 if (evsel == NULL)
2401 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2402 "syscalls:sys_exit");
2403 if (evsel &&
2404 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2405 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2406 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2407 goto out;
2408 }
2409
2410 evlist__for_each(session->evlist, evsel) {
2411 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2412 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2413 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2414 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2415 evsel->handler = trace__pgfault;
2416 }
2417
2418 err = parse_target_str(trace);
2419 if (err != 0)
2420 goto out;
2421
2422 setup_pager();
2423
2424 err = perf_session__process_events(session);
2425 if (err)
2426 pr_err("Failed to process events, error %d", err);
2427
2428 else if (trace->summary)
2429 trace__fprintf_thread_summary(trace, trace->output);
2430
2431 out:
2432 perf_session__delete(session);
2433
2434 return err;
2435 }
2436
2437 static size_t trace__fprintf_threads_header(FILE *fp)
2438 {
2439 size_t printed;
2440
2441 printed = fprintf(fp, "\n Summary of events:\n\n");
2442
2443 return printed;
2444 }
2445
2446 static size_t thread__dump_stats(struct thread_trace *ttrace,
2447 struct trace *trace, FILE *fp)
2448 {
2449 struct stats *stats;
2450 size_t printed = 0;
2451 struct syscall *sc;
2452 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2453
2454 if (inode == NULL)
2455 return 0;
2456
2457 printed += fprintf(fp, "\n");
2458
2459 printed += fprintf(fp, " syscall calls min avg max stddev\n");
2460 printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n");
2461 printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n");
2462
2463 /* each int_node is a syscall */
2464 while (inode) {
2465 stats = inode->priv;
2466 if (stats) {
2467 double min = (double)(stats->min) / NSEC_PER_MSEC;
2468 double max = (double)(stats->max) / NSEC_PER_MSEC;
2469 double avg = avg_stats(stats);
2470 double pct;
2471 u64 n = (u64) stats->n;
2472
2473 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2474 avg /= NSEC_PER_MSEC;
2475
2476 sc = &trace->syscalls.table[inode->i];
2477 printed += fprintf(fp, " %-15s", sc->name);
2478 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f",
2479 n, min, avg);
2480 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2481 }
2482
2483 inode = intlist__next(inode);
2484 }
2485
2486 printed += fprintf(fp, "\n\n");
2487
2488 return printed;
2489 }
2490
2491 /* struct used to pass data to per-thread function */
2492 struct summary_data {
2493 FILE *fp;
2494 struct trace *trace;
2495 size_t printed;
2496 };
2497
2498 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2499 {
2500 struct summary_data *data = priv;
2501 FILE *fp = data->fp;
2502 size_t printed = data->printed;
2503 struct trace *trace = data->trace;
2504 struct thread_trace *ttrace = thread__priv(thread);
2505 double ratio;
2506
2507 if (ttrace == NULL)
2508 return 0;
2509
2510 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2511
2512 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2513 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2514 printed += fprintf(fp, "%.1f%%", ratio);
2515 if (ttrace->pfmaj)
2516 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2517 if (ttrace->pfmin)
2518 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2519 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2520 printed += thread__dump_stats(ttrace, trace, fp);
2521
2522 data->printed += printed;
2523
2524 return 0;
2525 }
2526
2527 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2528 {
2529 struct summary_data data = {
2530 .fp = fp,
2531 .trace = trace
2532 };
2533 data.printed = trace__fprintf_threads_header(fp);
2534
2535 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2536
2537 return data.printed;
2538 }
2539
2540 static int trace__set_duration(const struct option *opt, const char *str,
2541 int unset __maybe_unused)
2542 {
2543 struct trace *trace = opt->value;
2544
2545 trace->duration_filter = atof(str);
2546 return 0;
2547 }
2548
2549 static int trace__set_filter_pids(const struct option *opt, const char *str,
2550 int unset __maybe_unused)
2551 {
2552 int ret = -1;
2553 size_t i;
2554 struct trace *trace = opt->value;
2555 /*
2556 * FIXME: introduce a intarray class, plain parse csv and create a
2557 * { int nr, int entries[] } struct...
2558 */
2559 struct intlist *list = intlist__new(str);
2560
2561 if (list == NULL)
2562 return -1;
2563
2564 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2565 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2566
2567 if (trace->filter_pids.entries == NULL)
2568 goto out;
2569
2570 trace->filter_pids.entries[0] = getpid();
2571
2572 for (i = 1; i < trace->filter_pids.nr; ++i)
2573 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2574
2575 intlist__delete(list);
2576 ret = 0;
2577 out:
2578 return ret;
2579 }
2580
2581 static int trace__open_output(struct trace *trace, const char *filename)
2582 {
2583 struct stat st;
2584
2585 if (!stat(filename, &st) && st.st_size) {
2586 char oldname[PATH_MAX];
2587
2588 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2589 unlink(oldname);
2590 rename(filename, oldname);
2591 }
2592
2593 trace->output = fopen(filename, "w");
2594
2595 return trace->output == NULL ? -errno : 0;
2596 }
2597
2598 static int parse_pagefaults(const struct option *opt, const char *str,
2599 int unset __maybe_unused)
2600 {
2601 int *trace_pgfaults = opt->value;
2602
2603 if (strcmp(str, "all") == 0)
2604 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2605 else if (strcmp(str, "maj") == 0)
2606 *trace_pgfaults |= TRACE_PFMAJ;
2607 else if (strcmp(str, "min") == 0)
2608 *trace_pgfaults |= TRACE_PFMIN;
2609 else
2610 return -1;
2611
2612 return 0;
2613 }
2614
2615 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2616 {
2617 struct perf_evsel *evsel;
2618
2619 evlist__for_each(evlist, evsel)
2620 evsel->handler = handler;
2621 }
2622
2623 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2624 {
2625 const char *trace_usage[] = {
2626 "perf trace [<options>] [<command>]",
2627 "perf trace [<options>] -- <command> [<options>]",
2628 "perf trace record [<options>] [<command>]",
2629 "perf trace record [<options>] -- <command> [<options>]",
2630 NULL
2631 };
2632 struct trace trace = {
2633 .audit = {
2634 .machine = audit_detect_machine(),
2635 .open_id = audit_name_to_syscall("open", trace.audit.machine),
2636 },
2637 .syscalls = {
2638 . max = -1,
2639 },
2640 .opts = {
2641 .target = {
2642 .uid = UINT_MAX,
2643 .uses_mmap = true,
2644 },
2645 .user_freq = UINT_MAX,
2646 .user_interval = ULLONG_MAX,
2647 .no_buffering = true,
2648 .mmap_pages = UINT_MAX,
2649 },
2650 .output = stdout,
2651 .show_comm = true,
2652 .trace_syscalls = true,
2653 };
2654 const char *output_name = NULL;
2655 const char *ev_qualifier_str = NULL;
2656 const struct option trace_options[] = {
2657 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2658 "event selector. use 'perf list' to list available events",
2659 parse_events_option),
2660 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2661 "show the thread COMM next to its id"),
2662 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
2663 OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
2664 "list of events to trace"),
2665 OPT_STRING('o', "output", &output_name, "file", "output file name"),
2666 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
2667 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2668 "trace events on existing process id"),
2669 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
2670 "trace events on existing thread id"),
2671 OPT_CALLBACK(0, "filter-pids", &trace, "float",
2672 "show only events with duration > N.M ms", trace__set_filter_pids),
2673 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
2674 "system-wide collection from all CPUs"),
2675 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
2676 "list of cpus to monitor"),
2677 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
2678 "child tasks do not inherit counters"),
2679 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2680 "number of mmap data pages",
2681 perf_evlist__parse_mmap_pages),
2682 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
2683 "user to profile"),
2684 OPT_CALLBACK(0, "duration", &trace, "float",
2685 "show only events with duration > N.M ms",
2686 trace__set_duration),
2687 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
2688 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
2689 OPT_BOOLEAN('T', "time", &trace.full_time,
2690 "Show full timestamp, not time relative to first start"),
2691 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2692 "Show only syscall summary with statistics"),
2693 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2694 "Show all syscalls and summary with statistics"),
2695 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2696 "Trace pagefaults", parse_pagefaults, "maj"),
2697 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
2698 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
2699 OPT_END()
2700 };
2701 const char * const trace_subcommands[] = { "record", NULL };
2702 int err;
2703 char bf[BUFSIZ];
2704
2705 signal(SIGSEGV, sighandler_dump_stack);
2706 signal(SIGFPE, sighandler_dump_stack);
2707
2708 trace.evlist = perf_evlist__new();
2709 if (trace.evlist == NULL)
2710 return -ENOMEM;
2711
2712 if (trace.evlist == NULL) {
2713 pr_err("Not enough memory to run!\n");
2714 goto out;
2715 }
2716
2717 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2718 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2719
2720 if (trace.trace_pgfaults) {
2721 trace.opts.sample_address = true;
2722 trace.opts.sample_time = true;
2723 }
2724
2725 if (trace.evlist->nr_entries > 0)
2726 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2727
2728 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2729 return trace__record(&trace, argc-1, &argv[1]);
2730
2731 /* summary_only implies summary option, but don't overwrite summary if set */
2732 if (trace.summary_only)
2733 trace.summary = trace.summary_only;
2734
2735 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2736 trace.evlist->nr_entries == 0 /* Was --events used? */) {
2737 pr_err("Please specify something to trace.\n");
2738 return -1;
2739 }
2740
2741 if (output_name != NULL) {
2742 err = trace__open_output(&trace, output_name);
2743 if (err < 0) {
2744 perror("failed to create output file");
2745 goto out;
2746 }
2747 }
2748
2749 if (ev_qualifier_str != NULL) {
2750 const char *s = ev_qualifier_str;
2751
2752 trace.not_ev_qualifier = *s == '!';
2753 if (trace.not_ev_qualifier)
2754 ++s;
2755 trace.ev_qualifier = strlist__new(true, s);
2756 if (trace.ev_qualifier == NULL) {
2757 fputs("Not enough memory to parse event qualifier",
2758 trace.output);
2759 err = -ENOMEM;
2760 goto out_close;
2761 }
2762 }
2763
2764 err = target__validate(&trace.opts.target);
2765 if (err) {
2766 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2767 fprintf(trace.output, "%s", bf);
2768 goto out_close;
2769 }
2770
2771 err = target__parse_uid(&trace.opts.target);
2772 if (err) {
2773 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
2774 fprintf(trace.output, "%s", bf);
2775 goto out_close;
2776 }
2777
2778 if (!argc && target__none(&trace.opts.target))
2779 trace.opts.target.system_wide = true;
2780
2781 if (input_name)
2782 err = trace__replay(&trace);
2783 else
2784 err = trace__run(&trace, argc, argv);
2785
2786 out_close:
2787 if (output_name != NULL)
2788 fclose(trace.output);
2789 out:
2790 return err;
2791 }