]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - tools/perf/builtin-trace.c
perf trace: Fix build when DWARF unwind isn't available
[mirror_ubuntu-bionic-kernel.git] / tools / perf / builtin-trace.c
1 /*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
21 #include "builtin.h"
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
39
40 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
41 #include <stdlib.h>
42 #include <linux/futex.h>
43 #include <linux/err.h>
44 #include <linux/seccomp.h>
45 #include <linux/filter.h>
46 #include <linux/audit.h>
47 #include <sys/ptrace.h>
48 #include <linux/random.h>
49 #include <linux/stringify.h>
50
51 #ifndef O_CLOEXEC
52 # define O_CLOEXEC 02000000
53 #endif
54
55 #ifndef MSG_CMSG_CLOEXEC
56 # define MSG_CMSG_CLOEXEC 0x40000000
57 #endif
58
59 #ifndef PERF_FLAG_FD_NO_GROUP
60 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
61 #endif
62
63 #ifndef PERF_FLAG_FD_OUTPUT
64 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
65 #endif
66
67 #ifndef PERF_FLAG_PID_CGROUP
68 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
69 #endif
70
71 #ifndef PERF_FLAG_FD_CLOEXEC
72 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
73 #endif
74
75 struct trace {
76 struct perf_tool tool;
77 struct syscalltbl *sctbl;
78 struct {
79 int max;
80 struct syscall *table;
81 struct {
82 struct perf_evsel *sys_enter,
83 *sys_exit;
84 } events;
85 } syscalls;
86 struct record_opts opts;
87 struct perf_evlist *evlist;
88 struct machine *host;
89 struct thread *current;
90 u64 base_time;
91 FILE *output;
92 unsigned long nr_events;
93 struct strlist *ev_qualifier;
94 struct {
95 size_t nr;
96 int *entries;
97 } ev_qualifier_ids;
98 struct intlist *tid_list;
99 struct intlist *pid_list;
100 struct {
101 size_t nr;
102 pid_t *entries;
103 } filter_pids;
104 double duration_filter;
105 double runtime_ms;
106 struct {
107 u64 vfs_getname,
108 proc_getname;
109 } stats;
110 unsigned int max_stack;
111 unsigned int min_stack;
112 bool not_ev_qualifier;
113 bool live;
114 bool full_time;
115 bool sched;
116 bool multiple_threads;
117 bool summary;
118 bool summary_only;
119 bool show_comm;
120 bool show_tool_stats;
121 bool trace_syscalls;
122 bool kernel_syscallchains;
123 bool force;
124 bool vfs_getname;
125 int trace_pgfaults;
126 int open_id;
127 };
128
129 struct tp_field {
130 int offset;
131 union {
132 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
133 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
134 };
135 };
136
137 #define TP_UINT_FIELD(bits) \
138 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
139 { \
140 u##bits value; \
141 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
142 return value; \
143 }
144
145 TP_UINT_FIELD(8);
146 TP_UINT_FIELD(16);
147 TP_UINT_FIELD(32);
148 TP_UINT_FIELD(64);
149
150 #define TP_UINT_FIELD__SWAPPED(bits) \
151 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
152 { \
153 u##bits value; \
154 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
155 return bswap_##bits(value);\
156 }
157
158 TP_UINT_FIELD__SWAPPED(16);
159 TP_UINT_FIELD__SWAPPED(32);
160 TP_UINT_FIELD__SWAPPED(64);
161
162 static int tp_field__init_uint(struct tp_field *field,
163 struct format_field *format_field,
164 bool needs_swap)
165 {
166 field->offset = format_field->offset;
167
168 switch (format_field->size) {
169 case 1:
170 field->integer = tp_field__u8;
171 break;
172 case 2:
173 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
174 break;
175 case 4:
176 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
177 break;
178 case 8:
179 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
180 break;
181 default:
182 return -1;
183 }
184
185 return 0;
186 }
187
188 static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
189 {
190 return sample->raw_data + field->offset;
191 }
192
193 static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
194 {
195 field->offset = format_field->offset;
196 field->pointer = tp_field__ptr;
197 return 0;
198 }
199
200 struct syscall_tp {
201 struct tp_field id;
202 union {
203 struct tp_field args, ret;
204 };
205 };
206
207 static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
208 struct tp_field *field,
209 const char *name)
210 {
211 struct format_field *format_field = perf_evsel__field(evsel, name);
212
213 if (format_field == NULL)
214 return -1;
215
216 return tp_field__init_uint(field, format_field, evsel->needs_swap);
217 }
218
219 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
220 ({ struct syscall_tp *sc = evsel->priv;\
221 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
222
223 static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
224 struct tp_field *field,
225 const char *name)
226 {
227 struct format_field *format_field = perf_evsel__field(evsel, name);
228
229 if (format_field == NULL)
230 return -1;
231
232 return tp_field__init_ptr(field, format_field);
233 }
234
235 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
236 ({ struct syscall_tp *sc = evsel->priv;\
237 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
238
239 static void perf_evsel__delete_priv(struct perf_evsel *evsel)
240 {
241 zfree(&evsel->priv);
242 perf_evsel__delete(evsel);
243 }
244
245 static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
246 {
247 evsel->priv = malloc(sizeof(struct syscall_tp));
248 if (evsel->priv != NULL) {
249 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
250 goto out_delete;
251
252 evsel->handler = handler;
253 return 0;
254 }
255
256 return -ENOMEM;
257
258 out_delete:
259 zfree(&evsel->priv);
260 return -ENOENT;
261 }
262
263 static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
264 {
265 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
266
267 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
268 if (IS_ERR(evsel))
269 evsel = perf_evsel__newtp("syscalls", direction);
270
271 if (IS_ERR(evsel))
272 return NULL;
273
274 if (perf_evsel__init_syscall_tp(evsel, handler))
275 goto out_delete;
276
277 return evsel;
278
279 out_delete:
280 perf_evsel__delete_priv(evsel);
281 return NULL;
282 }
283
284 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
285 ({ struct syscall_tp *fields = evsel->priv; \
286 fields->name.integer(&fields->name, sample); })
287
288 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
289 ({ struct syscall_tp *fields = evsel->priv; \
290 fields->name.pointer(&fields->name, sample); })
291
292 struct syscall_arg {
293 unsigned long val;
294 struct thread *thread;
295 struct trace *trace;
296 void *parm;
297 u8 idx;
298 u8 mask;
299 };
300
301 struct strarray {
302 int offset;
303 int nr_entries;
304 const char **entries;
305 };
306
307 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
308 .nr_entries = ARRAY_SIZE(array), \
309 .entries = array, \
310 }
311
312 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
313 .offset = off, \
314 .nr_entries = ARRAY_SIZE(array), \
315 .entries = array, \
316 }
317
318 static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
319 const char *intfmt,
320 struct syscall_arg *arg)
321 {
322 struct strarray *sa = arg->parm;
323 int idx = arg->val - sa->offset;
324
325 if (idx < 0 || idx >= sa->nr_entries)
326 return scnprintf(bf, size, intfmt, arg->val);
327
328 return scnprintf(bf, size, "%s", sa->entries[idx]);
329 }
330
331 static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
332 struct syscall_arg *arg)
333 {
334 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
335 }
336
337 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
338
339 #if defined(__i386__) || defined(__x86_64__)
340 /*
341 * FIXME: Make this available to all arches as soon as the ioctl beautifier
342 * gets rewritten to support all arches.
343 */
344 static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
345 struct syscall_arg *arg)
346 {
347 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
348 }
349
350 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
351 #endif /* defined(__i386__) || defined(__x86_64__) */
352
353 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
354 struct syscall_arg *arg);
355
356 #define SCA_FD syscall_arg__scnprintf_fd
357
358 static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
359 struct syscall_arg *arg)
360 {
361 int fd = arg->val;
362
363 if (fd == AT_FDCWD)
364 return scnprintf(bf, size, "CWD");
365
366 return syscall_arg__scnprintf_fd(bf, size, arg);
367 }
368
369 #define SCA_FDAT syscall_arg__scnprintf_fd_at
370
371 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
372 struct syscall_arg *arg);
373
374 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
375
376 static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
377 struct syscall_arg *arg)
378 {
379 return scnprintf(bf, size, "%#lx", arg->val);
380 }
381
382 #define SCA_HEX syscall_arg__scnprintf_hex
383
384 static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
385 struct syscall_arg *arg)
386 {
387 return scnprintf(bf, size, "%d", arg->val);
388 }
389
390 #define SCA_INT syscall_arg__scnprintf_int
391
392 static size_t syscall_arg__scnprintf_flock(char *bf, size_t size,
393 struct syscall_arg *arg)
394 {
395 int printed = 0, op = arg->val;
396
397 if (op == 0)
398 return scnprintf(bf, size, "NONE");
399 #define P_CMD(cmd) \
400 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
401 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
402 op &= ~LOCK_##cmd; \
403 }
404
405 P_CMD(SH);
406 P_CMD(EX);
407 P_CMD(NB);
408 P_CMD(UN);
409 P_CMD(MAND);
410 P_CMD(RW);
411 P_CMD(READ);
412 P_CMD(WRITE);
413 #undef P_OP
414
415 if (op)
416 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
417
418 return printed;
419 }
420
421 #define SCA_FLOCK syscall_arg__scnprintf_flock
422
423 static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
424 {
425 enum syscall_futex_args {
426 SCF_UADDR = (1 << 0),
427 SCF_OP = (1 << 1),
428 SCF_VAL = (1 << 2),
429 SCF_TIMEOUT = (1 << 3),
430 SCF_UADDR2 = (1 << 4),
431 SCF_VAL3 = (1 << 5),
432 };
433 int op = arg->val;
434 int cmd = op & FUTEX_CMD_MASK;
435 size_t printed = 0;
436
437 switch (cmd) {
438 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
439 P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
440 P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
441 P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
442 P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
443 P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
444 P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
445 P_FUTEX_OP(WAKE_OP); break;
446 P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
447 P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
448 P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
449 P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
450 P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
451 P_FUTEX_OP(WAIT_REQUEUE_PI); break;
452 default: printed = scnprintf(bf, size, "%#x", cmd); break;
453 }
454
455 if (op & FUTEX_PRIVATE_FLAG)
456 printed += scnprintf(bf + printed, size - printed, "|PRIV");
457
458 if (op & FUTEX_CLOCK_REALTIME)
459 printed += scnprintf(bf + printed, size - printed, "|CLKRT");
460
461 return printed;
462 }
463
464 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
465
466 static const char *bpf_cmd[] = {
467 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
468 "MAP_GET_NEXT_KEY", "PROG_LOAD",
469 };
470 static DEFINE_STRARRAY(bpf_cmd);
471
472 static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
473 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
474
475 static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
476 static DEFINE_STRARRAY(itimers);
477
478 static const char *keyctl_options[] = {
479 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
480 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
481 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
482 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
483 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
484 };
485 static DEFINE_STRARRAY(keyctl_options);
486
487 static const char *whences[] = { "SET", "CUR", "END",
488 #ifdef SEEK_DATA
489 "DATA",
490 #endif
491 #ifdef SEEK_HOLE
492 "HOLE",
493 #endif
494 };
495 static DEFINE_STRARRAY(whences);
496
497 static const char *fcntl_cmds[] = {
498 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
499 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
500 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
501 "F_GETOWNER_UIDS",
502 };
503 static DEFINE_STRARRAY(fcntl_cmds);
504
505 static const char *rlimit_resources[] = {
506 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
507 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
508 "RTTIME",
509 };
510 static DEFINE_STRARRAY(rlimit_resources);
511
512 static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
513 static DEFINE_STRARRAY(sighow);
514
515 static const char *clockid[] = {
516 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
517 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
518 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
519 };
520 static DEFINE_STRARRAY(clockid);
521
522 static const char *socket_families[] = {
523 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
524 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
525 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
526 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
527 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
528 "ALG", "NFC", "VSOCK",
529 };
530 static DEFINE_STRARRAY(socket_families);
531
532 #ifndef MSG_PROBE
533 #define MSG_PROBE 0x10
534 #endif
535 #ifndef MSG_WAITFORONE
536 #define MSG_WAITFORONE 0x10000
537 #endif
538 #ifndef MSG_SENDPAGE_NOTLAST
539 #define MSG_SENDPAGE_NOTLAST 0x20000
540 #endif
541 #ifndef MSG_FASTOPEN
542 #define MSG_FASTOPEN 0x20000000
543 #endif
544
545 static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
546 struct syscall_arg *arg)
547 {
548 int printed = 0, flags = arg->val;
549
550 if (flags == 0)
551 return scnprintf(bf, size, "NONE");
552 #define P_MSG_FLAG(n) \
553 if (flags & MSG_##n) { \
554 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
555 flags &= ~MSG_##n; \
556 }
557
558 P_MSG_FLAG(OOB);
559 P_MSG_FLAG(PEEK);
560 P_MSG_FLAG(DONTROUTE);
561 P_MSG_FLAG(TRYHARD);
562 P_MSG_FLAG(CTRUNC);
563 P_MSG_FLAG(PROBE);
564 P_MSG_FLAG(TRUNC);
565 P_MSG_FLAG(DONTWAIT);
566 P_MSG_FLAG(EOR);
567 P_MSG_FLAG(WAITALL);
568 P_MSG_FLAG(FIN);
569 P_MSG_FLAG(SYN);
570 P_MSG_FLAG(CONFIRM);
571 P_MSG_FLAG(RST);
572 P_MSG_FLAG(ERRQUEUE);
573 P_MSG_FLAG(NOSIGNAL);
574 P_MSG_FLAG(MORE);
575 P_MSG_FLAG(WAITFORONE);
576 P_MSG_FLAG(SENDPAGE_NOTLAST);
577 P_MSG_FLAG(FASTOPEN);
578 P_MSG_FLAG(CMSG_CLOEXEC);
579 #undef P_MSG_FLAG
580
581 if (flags)
582 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
583
584 return printed;
585 }
586
587 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
588
589 static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
590 struct syscall_arg *arg)
591 {
592 size_t printed = 0;
593 int mode = arg->val;
594
595 if (mode == F_OK) /* 0 */
596 return scnprintf(bf, size, "F");
597 #define P_MODE(n) \
598 if (mode & n##_OK) { \
599 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
600 mode &= ~n##_OK; \
601 }
602
603 P_MODE(R);
604 P_MODE(W);
605 P_MODE(X);
606 #undef P_MODE
607
608 if (mode)
609 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
610
611 return printed;
612 }
613
614 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
615
616 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
617 struct syscall_arg *arg);
618
619 #define SCA_FILENAME syscall_arg__scnprintf_filename
620
621 static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
622 struct syscall_arg *arg)
623 {
624 int printed = 0, flags = arg->val;
625
626 if (!(flags & O_CREAT))
627 arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
628
629 if (flags == 0)
630 return scnprintf(bf, size, "RDONLY");
631 #define P_FLAG(n) \
632 if (flags & O_##n) { \
633 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
634 flags &= ~O_##n; \
635 }
636
637 P_FLAG(APPEND);
638 P_FLAG(ASYNC);
639 P_FLAG(CLOEXEC);
640 P_FLAG(CREAT);
641 P_FLAG(DIRECT);
642 P_FLAG(DIRECTORY);
643 P_FLAG(EXCL);
644 P_FLAG(LARGEFILE);
645 P_FLAG(NOATIME);
646 P_FLAG(NOCTTY);
647 #ifdef O_NONBLOCK
648 P_FLAG(NONBLOCK);
649 #elif O_NDELAY
650 P_FLAG(NDELAY);
651 #endif
652 #ifdef O_PATH
653 P_FLAG(PATH);
654 #endif
655 P_FLAG(RDWR);
656 #ifdef O_DSYNC
657 if ((flags & O_SYNC) == O_SYNC)
658 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
659 else {
660 P_FLAG(DSYNC);
661 }
662 #else
663 P_FLAG(SYNC);
664 #endif
665 P_FLAG(TRUNC);
666 P_FLAG(WRONLY);
667 #undef P_FLAG
668
669 if (flags)
670 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
671
672 return printed;
673 }
674
675 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
676
677 static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
678 struct syscall_arg *arg)
679 {
680 int printed = 0, flags = arg->val;
681
682 if (flags == 0)
683 return 0;
684
685 #define P_FLAG(n) \
686 if (flags & PERF_FLAG_##n) { \
687 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
688 flags &= ~PERF_FLAG_##n; \
689 }
690
691 P_FLAG(FD_NO_GROUP);
692 P_FLAG(FD_OUTPUT);
693 P_FLAG(PID_CGROUP);
694 P_FLAG(FD_CLOEXEC);
695 #undef P_FLAG
696
697 if (flags)
698 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
699
700 return printed;
701 }
702
703 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
704
705 static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
706 struct syscall_arg *arg)
707 {
708 int printed = 0, flags = arg->val;
709
710 #define P_FLAG(n) \
711 if (flags & O_##n) { \
712 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
713 flags &= ~O_##n; \
714 }
715
716 P_FLAG(CLOEXEC);
717 P_FLAG(NONBLOCK);
718 #undef P_FLAG
719
720 if (flags)
721 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
722
723 return printed;
724 }
725
726 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
727
728 static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
729 {
730 int sig = arg->val;
731
732 switch (sig) {
733 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
734 P_SIGNUM(HUP);
735 P_SIGNUM(INT);
736 P_SIGNUM(QUIT);
737 P_SIGNUM(ILL);
738 P_SIGNUM(TRAP);
739 P_SIGNUM(ABRT);
740 P_SIGNUM(BUS);
741 P_SIGNUM(FPE);
742 P_SIGNUM(KILL);
743 P_SIGNUM(USR1);
744 P_SIGNUM(SEGV);
745 P_SIGNUM(USR2);
746 P_SIGNUM(PIPE);
747 P_SIGNUM(ALRM);
748 P_SIGNUM(TERM);
749 P_SIGNUM(CHLD);
750 P_SIGNUM(CONT);
751 P_SIGNUM(STOP);
752 P_SIGNUM(TSTP);
753 P_SIGNUM(TTIN);
754 P_SIGNUM(TTOU);
755 P_SIGNUM(URG);
756 P_SIGNUM(XCPU);
757 P_SIGNUM(XFSZ);
758 P_SIGNUM(VTALRM);
759 P_SIGNUM(PROF);
760 P_SIGNUM(WINCH);
761 P_SIGNUM(IO);
762 P_SIGNUM(PWR);
763 P_SIGNUM(SYS);
764 #ifdef SIGEMT
765 P_SIGNUM(EMT);
766 #endif
767 #ifdef SIGSTKFLT
768 P_SIGNUM(STKFLT);
769 #endif
770 #ifdef SIGSWI
771 P_SIGNUM(SWI);
772 #endif
773 default: break;
774 }
775
776 return scnprintf(bf, size, "%#x", sig);
777 }
778
779 #define SCA_SIGNUM syscall_arg__scnprintf_signum
780
781 #if defined(__i386__) || defined(__x86_64__)
782 /*
783 * FIXME: Make this available to all arches.
784 */
785 #define TCGETS 0x5401
786
787 static const char *tioctls[] = {
788 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
789 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
790 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
791 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
792 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
793 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
794 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
795 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
796 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
797 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
798 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
799 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
800 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
801 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
802 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
803 };
804
805 static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
806 #endif /* defined(__i386__) || defined(__x86_64__) */
807
808 #ifndef SECCOMP_SET_MODE_STRICT
809 #define SECCOMP_SET_MODE_STRICT 0
810 #endif
811 #ifndef SECCOMP_SET_MODE_FILTER
812 #define SECCOMP_SET_MODE_FILTER 1
813 #endif
814
815 static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
816 {
817 int op = arg->val;
818 size_t printed = 0;
819
820 switch (op) {
821 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
822 P_SECCOMP_SET_MODE_OP(STRICT);
823 P_SECCOMP_SET_MODE_OP(FILTER);
824 #undef P_SECCOMP_SET_MODE_OP
825 default: printed = scnprintf(bf, size, "%#x", op); break;
826 }
827
828 return printed;
829 }
830
831 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
832
833 #ifndef SECCOMP_FILTER_FLAG_TSYNC
834 #define SECCOMP_FILTER_FLAG_TSYNC 1
835 #endif
836
837 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
838 struct syscall_arg *arg)
839 {
840 int printed = 0, flags = arg->val;
841
842 #define P_FLAG(n) \
843 if (flags & SECCOMP_FILTER_FLAG_##n) { \
844 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
845 flags &= ~SECCOMP_FILTER_FLAG_##n; \
846 }
847
848 P_FLAG(TSYNC);
849 #undef P_FLAG
850
851 if (flags)
852 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
853
854 return printed;
855 }
856
857 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
858
859 #ifndef GRND_NONBLOCK
860 #define GRND_NONBLOCK 0x0001
861 #endif
862 #ifndef GRND_RANDOM
863 #define GRND_RANDOM 0x0002
864 #endif
865
866 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
867 struct syscall_arg *arg)
868 {
869 int printed = 0, flags = arg->val;
870
871 #define P_FLAG(n) \
872 if (flags & GRND_##n) { \
873 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
874 flags &= ~GRND_##n; \
875 }
876
877 P_FLAG(RANDOM);
878 P_FLAG(NONBLOCK);
879 #undef P_FLAG
880
881 if (flags)
882 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
883
884 return printed;
885 }
886
887 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
888
889 #define STRARRAY(arg, name, array) \
890 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
891 .arg_parm = { [arg] = &strarray__##array, }
892
893 #include "trace/beauty/eventfd.c"
894 #include "trace/beauty/pid.c"
895 #include "trace/beauty/mmap.c"
896 #include "trace/beauty/mode_t.c"
897 #include "trace/beauty/sched_policy.c"
898 #include "trace/beauty/socket_type.c"
899 #include "trace/beauty/waitid_options.c"
900
901 static struct syscall_fmt {
902 const char *name;
903 const char *alias;
904 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
905 void *arg_parm[6];
906 bool errmsg;
907 bool errpid;
908 bool timeout;
909 bool hexret;
910 } syscall_fmts[] = {
911 { .name = "access", .errmsg = true,
912 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
913 [1] = SCA_ACCMODE, /* mode */ }, },
914 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
915 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
916 { .name = "brk", .hexret = true,
917 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
918 { .name = "chdir", .errmsg = true,
919 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
920 { .name = "chmod", .errmsg = true,
921 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
922 { .name = "chroot", .errmsg = true,
923 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
924 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
925 { .name = "clone", .errpid = true, },
926 { .name = "close", .errmsg = true,
927 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
928 { .name = "connect", .errmsg = true, },
929 { .name = "creat", .errmsg = true,
930 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
931 { .name = "dup", .errmsg = true,
932 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
933 { .name = "dup2", .errmsg = true,
934 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
935 { .name = "dup3", .errmsg = true,
936 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
937 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
938 { .name = "eventfd2", .errmsg = true,
939 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
940 { .name = "faccessat", .errmsg = true,
941 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
942 [1] = SCA_FILENAME, /* filename */ }, },
943 { .name = "fadvise64", .errmsg = true,
944 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
945 { .name = "fallocate", .errmsg = true,
946 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
947 { .name = "fchdir", .errmsg = true,
948 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
949 { .name = "fchmod", .errmsg = true,
950 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
951 { .name = "fchmodat", .errmsg = true,
952 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
953 [1] = SCA_FILENAME, /* filename */ }, },
954 { .name = "fchown", .errmsg = true,
955 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
956 { .name = "fchownat", .errmsg = true,
957 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
958 [1] = SCA_FILENAME, /* filename */ }, },
959 { .name = "fcntl", .errmsg = true,
960 .arg_scnprintf = { [0] = SCA_FD, /* fd */
961 [1] = SCA_STRARRAY, /* cmd */ },
962 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
963 { .name = "fdatasync", .errmsg = true,
964 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
965 { .name = "flock", .errmsg = true,
966 .arg_scnprintf = { [0] = SCA_FD, /* fd */
967 [1] = SCA_FLOCK, /* cmd */ }, },
968 { .name = "fsetxattr", .errmsg = true,
969 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
970 { .name = "fstat", .errmsg = true, .alias = "newfstat",
971 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
972 { .name = "fstatat", .errmsg = true, .alias = "newfstatat",
973 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
974 [1] = SCA_FILENAME, /* filename */ }, },
975 { .name = "fstatfs", .errmsg = true,
976 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
977 { .name = "fsync", .errmsg = true,
978 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
979 { .name = "ftruncate", .errmsg = true,
980 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
981 { .name = "futex", .errmsg = true,
982 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
983 { .name = "futimesat", .errmsg = true,
984 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
985 [1] = SCA_FILENAME, /* filename */ }, },
986 { .name = "getdents", .errmsg = true,
987 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
988 { .name = "getdents64", .errmsg = true,
989 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
990 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
991 { .name = "getpid", .errpid = true, },
992 { .name = "getpgid", .errpid = true, },
993 { .name = "getppid", .errpid = true, },
994 { .name = "getrandom", .errmsg = true,
995 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
996 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
997 { .name = "getxattr", .errmsg = true,
998 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
999 { .name = "inotify_add_watch", .errmsg = true,
1000 .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, },
1001 { .name = "ioctl", .errmsg = true,
1002 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1003 #if defined(__i386__) || defined(__x86_64__)
1004 /*
1005 * FIXME: Make this available to all arches.
1006 */
1007 [1] = SCA_STRHEXARRAY, /* cmd */
1008 [2] = SCA_HEX, /* arg */ },
1009 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
1010 #else
1011 [2] = SCA_HEX, /* arg */ }, },
1012 #endif
1013 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
1014 { .name = "kill", .errmsg = true,
1015 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1016 { .name = "lchown", .errmsg = true,
1017 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1018 { .name = "lgetxattr", .errmsg = true,
1019 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1020 { .name = "linkat", .errmsg = true,
1021 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
1022 { .name = "listxattr", .errmsg = true,
1023 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1024 { .name = "llistxattr", .errmsg = true,
1025 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1026 { .name = "lremovexattr", .errmsg = true,
1027 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1028 { .name = "lseek", .errmsg = true,
1029 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1030 [2] = SCA_STRARRAY, /* whence */ },
1031 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
1032 { .name = "lsetxattr", .errmsg = true,
1033 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1034 { .name = "lstat", .errmsg = true, .alias = "newlstat",
1035 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1036 { .name = "lsxattr", .errmsg = true,
1037 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1038 { .name = "madvise", .errmsg = true,
1039 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1040 [2] = SCA_MADV_BHV, /* behavior */ }, },
1041 { .name = "mkdir", .errmsg = true,
1042 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1043 { .name = "mkdirat", .errmsg = true,
1044 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1045 [1] = SCA_FILENAME, /* pathname */ }, },
1046 { .name = "mknod", .errmsg = true,
1047 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1048 { .name = "mknodat", .errmsg = true,
1049 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */
1050 [1] = SCA_FILENAME, /* filename */ }, },
1051 { .name = "mlock", .errmsg = true,
1052 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1053 { .name = "mlockall", .errmsg = true,
1054 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1055 { .name = "mmap", .hexret = true,
1056 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1057 [2] = SCA_MMAP_PROT, /* prot */
1058 [3] = SCA_MMAP_FLAGS, /* flags */
1059 [4] = SCA_FD, /* fd */ }, },
1060 { .name = "mprotect", .errmsg = true,
1061 .arg_scnprintf = { [0] = SCA_HEX, /* start */
1062 [2] = SCA_MMAP_PROT, /* prot */ }, },
1063 { .name = "mq_unlink", .errmsg = true,
1064 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
1065 { .name = "mremap", .hexret = true,
1066 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
1067 [3] = SCA_MREMAP_FLAGS, /* flags */
1068 [4] = SCA_HEX, /* new_addr */ }, },
1069 { .name = "munlock", .errmsg = true,
1070 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1071 { .name = "munmap", .errmsg = true,
1072 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
1073 { .name = "name_to_handle_at", .errmsg = true,
1074 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1075 { .name = "newfstatat", .errmsg = true,
1076 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1077 [1] = SCA_FILENAME, /* filename */ }, },
1078 { .name = "open", .errmsg = true,
1079 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */
1080 [1] = SCA_OPEN_FLAGS, /* flags */ }, },
1081 { .name = "open_by_handle_at", .errmsg = true,
1082 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1083 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1084 { .name = "openat", .errmsg = true,
1085 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1086 [1] = SCA_FILENAME, /* filename */
1087 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
1088 { .name = "perf_event_open", .errmsg = true,
1089 .arg_scnprintf = { [1] = SCA_INT, /* pid */
1090 [2] = SCA_INT, /* cpu */
1091 [3] = SCA_FD, /* group_fd */
1092 [4] = SCA_PERF_FLAGS, /* flags */ }, },
1093 { .name = "pipe2", .errmsg = true,
1094 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
1095 { .name = "poll", .errmsg = true, .timeout = true, },
1096 { .name = "ppoll", .errmsg = true, .timeout = true, },
1097 { .name = "pread", .errmsg = true, .alias = "pread64",
1098 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1099 { .name = "preadv", .errmsg = true, .alias = "pread",
1100 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1101 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
1102 { .name = "pwrite", .errmsg = true, .alias = "pwrite64",
1103 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1104 { .name = "pwritev", .errmsg = true,
1105 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1106 { .name = "read", .errmsg = true,
1107 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1108 { .name = "readlink", .errmsg = true,
1109 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1110 { .name = "readlinkat", .errmsg = true,
1111 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1112 [1] = SCA_FILENAME, /* pathname */ }, },
1113 { .name = "readv", .errmsg = true,
1114 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1115 { .name = "recvfrom", .errmsg = true,
1116 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1117 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1118 { .name = "recvmmsg", .errmsg = true,
1119 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1120 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1121 { .name = "recvmsg", .errmsg = true,
1122 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1123 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1124 { .name = "removexattr", .errmsg = true,
1125 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1126 { .name = "renameat", .errmsg = true,
1127 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1128 { .name = "rmdir", .errmsg = true,
1129 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1130 { .name = "rt_sigaction", .errmsg = true,
1131 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
1132 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
1133 { .name = "rt_sigqueueinfo", .errmsg = true,
1134 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1135 { .name = "rt_tgsigqueueinfo", .errmsg = true,
1136 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1137 { .name = "sched_setscheduler", .errmsg = true,
1138 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
1139 { .name = "seccomp", .errmsg = true,
1140 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
1141 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
1142 { .name = "select", .errmsg = true, .timeout = true, },
1143 { .name = "sendmmsg", .errmsg = true,
1144 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1145 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1146 { .name = "sendmsg", .errmsg = true,
1147 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1148 [2] = SCA_MSG_FLAGS, /* flags */ }, },
1149 { .name = "sendto", .errmsg = true,
1150 .arg_scnprintf = { [0] = SCA_FD, /* fd */
1151 [3] = SCA_MSG_FLAGS, /* flags */ }, },
1152 { .name = "set_tid_address", .errpid = true, },
1153 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
1154 { .name = "setpgid", .errmsg = true, },
1155 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
1156 { .name = "setxattr", .errmsg = true,
1157 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1158 { .name = "shutdown", .errmsg = true,
1159 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1160 { .name = "socket", .errmsg = true,
1161 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1162 [1] = SCA_SK_TYPE, /* type */ },
1163 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1164 { .name = "socketpair", .errmsg = true,
1165 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
1166 [1] = SCA_SK_TYPE, /* type */ },
1167 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
1168 { .name = "stat", .errmsg = true, .alias = "newstat",
1169 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1170 { .name = "statfs", .errmsg = true,
1171 .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, },
1172 { .name = "swapoff", .errmsg = true,
1173 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1174 { .name = "swapon", .errmsg = true,
1175 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
1176 { .name = "symlinkat", .errmsg = true,
1177 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
1178 { .name = "tgkill", .errmsg = true,
1179 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
1180 { .name = "tkill", .errmsg = true,
1181 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
1182 { .name = "truncate", .errmsg = true,
1183 .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, },
1184 { .name = "uname", .errmsg = true, .alias = "newuname", },
1185 { .name = "unlinkat", .errmsg = true,
1186 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
1187 [1] = SCA_FILENAME, /* pathname */ }, },
1188 { .name = "utime", .errmsg = true,
1189 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1190 { .name = "utimensat", .errmsg = true,
1191 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */
1192 [1] = SCA_FILENAME, /* filename */ }, },
1193 { .name = "utimes", .errmsg = true,
1194 .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, },
1195 { .name = "vmsplice", .errmsg = true,
1196 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1197 { .name = "wait4", .errpid = true,
1198 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
1199 { .name = "waitid", .errpid = true,
1200 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
1201 { .name = "write", .errmsg = true,
1202 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1203 { .name = "writev", .errmsg = true,
1204 .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, },
1205 };
1206
1207 static int syscall_fmt__cmp(const void *name, const void *fmtp)
1208 {
1209 const struct syscall_fmt *fmt = fmtp;
1210 return strcmp(name, fmt->name);
1211 }
1212
1213 static struct syscall_fmt *syscall_fmt__find(const char *name)
1214 {
1215 const int nmemb = ARRAY_SIZE(syscall_fmts);
1216 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
1217 }
1218
1219 struct syscall {
1220 struct event_format *tp_format;
1221 int nr_args;
1222 struct format_field *args;
1223 const char *name;
1224 bool is_exit;
1225 struct syscall_fmt *fmt;
1226 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1227 void **arg_parm;
1228 };
1229
1230 static size_t fprintf_duration(unsigned long t, FILE *fp)
1231 {
1232 double duration = (double)t / NSEC_PER_MSEC;
1233 size_t printed = fprintf(fp, "(");
1234
1235 if (duration >= 1.0)
1236 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
1237 else if (duration >= 0.01)
1238 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
1239 else
1240 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
1241 return printed + fprintf(fp, "): ");
1242 }
1243
1244 /**
1245 * filename.ptr: The filename char pointer that will be vfs_getname'd
1246 * filename.entry_str_pos: Where to insert the string translated from
1247 * filename.ptr by the vfs_getname tracepoint/kprobe.
1248 */
1249 struct thread_trace {
1250 u64 entry_time;
1251 u64 exit_time;
1252 bool entry_pending;
1253 unsigned long nr_events;
1254 unsigned long pfmaj, pfmin;
1255 char *entry_str;
1256 double runtime_ms;
1257 struct {
1258 unsigned long ptr;
1259 short int entry_str_pos;
1260 bool pending_open;
1261 unsigned int namelen;
1262 char *name;
1263 } filename;
1264 struct {
1265 int max;
1266 char **table;
1267 } paths;
1268
1269 struct intlist *syscall_stats;
1270 };
1271
1272 static struct thread_trace *thread_trace__new(void)
1273 {
1274 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
1275
1276 if (ttrace)
1277 ttrace->paths.max = -1;
1278
1279 ttrace->syscall_stats = intlist__new(NULL);
1280
1281 return ttrace;
1282 }
1283
1284 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
1285 {
1286 struct thread_trace *ttrace;
1287
1288 if (thread == NULL)
1289 goto fail;
1290
1291 if (thread__priv(thread) == NULL)
1292 thread__set_priv(thread, thread_trace__new());
1293
1294 if (thread__priv(thread) == NULL)
1295 goto fail;
1296
1297 ttrace = thread__priv(thread);
1298 ++ttrace->nr_events;
1299
1300 return ttrace;
1301 fail:
1302 color_fprintf(fp, PERF_COLOR_RED,
1303 "WARNING: not enough memory, dropping samples!\n");
1304 return NULL;
1305 }
1306
1307 #define TRACE_PFMAJ (1 << 0)
1308 #define TRACE_PFMIN (1 << 1)
1309
1310 static const size_t trace__entry_str_size = 2048;
1311
1312 static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
1313 {
1314 struct thread_trace *ttrace = thread__priv(thread);
1315
1316 if (fd > ttrace->paths.max) {
1317 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
1318
1319 if (npath == NULL)
1320 return -1;
1321
1322 if (ttrace->paths.max != -1) {
1323 memset(npath + ttrace->paths.max + 1, 0,
1324 (fd - ttrace->paths.max) * sizeof(char *));
1325 } else {
1326 memset(npath, 0, (fd + 1) * sizeof(char *));
1327 }
1328
1329 ttrace->paths.table = npath;
1330 ttrace->paths.max = fd;
1331 }
1332
1333 ttrace->paths.table[fd] = strdup(pathname);
1334
1335 return ttrace->paths.table[fd] != NULL ? 0 : -1;
1336 }
1337
1338 static int thread__read_fd_path(struct thread *thread, int fd)
1339 {
1340 char linkname[PATH_MAX], pathname[PATH_MAX];
1341 struct stat st;
1342 int ret;
1343
1344 if (thread->pid_ == thread->tid) {
1345 scnprintf(linkname, sizeof(linkname),
1346 "/proc/%d/fd/%d", thread->pid_, fd);
1347 } else {
1348 scnprintf(linkname, sizeof(linkname),
1349 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
1350 }
1351
1352 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
1353 return -1;
1354
1355 ret = readlink(linkname, pathname, sizeof(pathname));
1356
1357 if (ret < 0 || ret > st.st_size)
1358 return -1;
1359
1360 pathname[ret] = '\0';
1361 return trace__set_fd_pathname(thread, fd, pathname);
1362 }
1363
1364 static const char *thread__fd_path(struct thread *thread, int fd,
1365 struct trace *trace)
1366 {
1367 struct thread_trace *ttrace = thread__priv(thread);
1368
1369 if (ttrace == NULL)
1370 return NULL;
1371
1372 if (fd < 0)
1373 return NULL;
1374
1375 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
1376 if (!trace->live)
1377 return NULL;
1378 ++trace->stats.proc_getname;
1379 if (thread__read_fd_path(thread, fd))
1380 return NULL;
1381 }
1382
1383 return ttrace->paths.table[fd];
1384 }
1385
1386 static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
1387 struct syscall_arg *arg)
1388 {
1389 int fd = arg->val;
1390 size_t printed = scnprintf(bf, size, "%d", fd);
1391 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
1392
1393 if (path)
1394 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1395
1396 return printed;
1397 }
1398
1399 static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1400 struct syscall_arg *arg)
1401 {
1402 int fd = arg->val;
1403 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
1404 struct thread_trace *ttrace = thread__priv(arg->thread);
1405
1406 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1407 zfree(&ttrace->paths.table[fd]);
1408
1409 return printed;
1410 }
1411
1412 static void thread__set_filename_pos(struct thread *thread, const char *bf,
1413 unsigned long ptr)
1414 {
1415 struct thread_trace *ttrace = thread__priv(thread);
1416
1417 ttrace->filename.ptr = ptr;
1418 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1419 }
1420
1421 static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1422 struct syscall_arg *arg)
1423 {
1424 unsigned long ptr = arg->val;
1425
1426 if (!arg->trace->vfs_getname)
1427 return scnprintf(bf, size, "%#x", ptr);
1428
1429 thread__set_filename_pos(arg->thread, bf, ptr);
1430 return 0;
1431 }
1432
1433 static bool trace__filter_duration(struct trace *trace, double t)
1434 {
1435 return t < (trace->duration_filter * NSEC_PER_MSEC);
1436 }
1437
1438 static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1439 {
1440 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1441
1442 return fprintf(fp, "%10.3f ", ts);
1443 }
1444
1445 static bool done = false;
1446 static bool interrupted = false;
1447
1448 static void sig_handler(int sig)
1449 {
1450 done = true;
1451 interrupted = sig == SIGINT;
1452 }
1453
1454 static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
1455 u64 duration, u64 tstamp, FILE *fp)
1456 {
1457 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
1458 printed += fprintf_duration(duration, fp);
1459
1460 if (trace->multiple_threads) {
1461 if (trace->show_comm)
1462 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
1463 printed += fprintf(fp, "%d ", thread->tid);
1464 }
1465
1466 return printed;
1467 }
1468
1469 static int trace__process_event(struct trace *trace, struct machine *machine,
1470 union perf_event *event, struct perf_sample *sample)
1471 {
1472 int ret = 0;
1473
1474 switch (event->header.type) {
1475 case PERF_RECORD_LOST:
1476 color_fprintf(trace->output, PERF_COLOR_RED,
1477 "LOST %" PRIu64 " events!\n", event->lost.lost);
1478 ret = machine__process_lost_event(machine, event, sample);
1479 break;
1480 default:
1481 ret = machine__process_event(machine, event, sample);
1482 break;
1483 }
1484
1485 return ret;
1486 }
1487
1488 static int trace__tool_process(struct perf_tool *tool,
1489 union perf_event *event,
1490 struct perf_sample *sample,
1491 struct machine *machine)
1492 {
1493 struct trace *trace = container_of(tool, struct trace, tool);
1494 return trace__process_event(trace, machine, event, sample);
1495 }
1496
1497 static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1498 {
1499 int err = symbol__init(NULL);
1500
1501 if (err)
1502 return err;
1503
1504 trace->host = machine__new_host();
1505 if (trace->host == NULL)
1506 return -ENOMEM;
1507
1508 if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0)
1509 return -errno;
1510
1511 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
1512 evlist->threads, trace__tool_process, false,
1513 trace->opts.proc_map_timeout);
1514 if (err)
1515 symbol__exit();
1516
1517 return err;
1518 }
1519
1520 static int syscall__set_arg_fmts(struct syscall *sc)
1521 {
1522 struct format_field *field;
1523 int idx = 0;
1524
1525 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
1526 if (sc->arg_scnprintf == NULL)
1527 return -1;
1528
1529 if (sc->fmt)
1530 sc->arg_parm = sc->fmt->arg_parm;
1531
1532 for (field = sc->args; field; field = field->next) {
1533 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1534 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
1535 else if (field->flags & FIELD_IS_POINTER)
1536 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
1537 else if (strcmp(field->type, "pid_t") == 0)
1538 sc->arg_scnprintf[idx] = SCA_PID;
1539 else if (strcmp(field->type, "umode_t") == 0)
1540 sc->arg_scnprintf[idx] = SCA_MODE_T;
1541 ++idx;
1542 }
1543
1544 return 0;
1545 }
1546
1547 static int trace__read_syscall_info(struct trace *trace, int id)
1548 {
1549 char tp_name[128];
1550 struct syscall *sc;
1551 const char *name = syscalltbl__name(trace->sctbl, id);
1552
1553 if (name == NULL)
1554 return -1;
1555
1556 if (id > trace->syscalls.max) {
1557 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1558
1559 if (nsyscalls == NULL)
1560 return -1;
1561
1562 if (trace->syscalls.max != -1) {
1563 memset(nsyscalls + trace->syscalls.max + 1, 0,
1564 (id - trace->syscalls.max) * sizeof(*sc));
1565 } else {
1566 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1567 }
1568
1569 trace->syscalls.table = nsyscalls;
1570 trace->syscalls.max = id;
1571 }
1572
1573 sc = trace->syscalls.table + id;
1574 sc->name = name;
1575
1576 sc->fmt = syscall_fmt__find(sc->name);
1577
1578 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
1579 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1580
1581 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
1582 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
1583 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
1584 }
1585
1586 if (IS_ERR(sc->tp_format))
1587 return -1;
1588
1589 sc->args = sc->tp_format->format.fields;
1590 sc->nr_args = sc->tp_format->format.nr_fields;
1591 /*
1592 * We need to check and discard the first variable '__syscall_nr'
1593 * or 'nr' that mean the syscall number. It is needless here.
1594 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1595 */
1596 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
1597 sc->args = sc->args->next;
1598 --sc->nr_args;
1599 }
1600
1601 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1602
1603 return syscall__set_arg_fmts(sc);
1604 }
1605
1606 static int trace__validate_ev_qualifier(struct trace *trace)
1607 {
1608 int err = 0, i;
1609 struct str_node *pos;
1610
1611 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1612 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1613 sizeof(trace->ev_qualifier_ids.entries[0]));
1614
1615 if (trace->ev_qualifier_ids.entries == NULL) {
1616 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1617 trace->output);
1618 err = -EINVAL;
1619 goto out;
1620 }
1621
1622 i = 0;
1623
1624 strlist__for_each(pos, trace->ev_qualifier) {
1625 const char *sc = pos->s;
1626 int id = syscalltbl__id(trace->sctbl, sc);
1627
1628 if (id < 0) {
1629 if (err == 0) {
1630 fputs("Error:\tInvalid syscall ", trace->output);
1631 err = -EINVAL;
1632 } else {
1633 fputs(", ", trace->output);
1634 }
1635
1636 fputs(sc, trace->output);
1637 }
1638
1639 trace->ev_qualifier_ids.entries[i++] = id;
1640 }
1641
1642 if (err < 0) {
1643 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1644 "\nHint:\tand: 'man syscalls'\n", trace->output);
1645 zfree(&trace->ev_qualifier_ids.entries);
1646 trace->ev_qualifier_ids.nr = 0;
1647 }
1648 out:
1649 return err;
1650 }
1651
1652 /*
1653 * args is to be interpreted as a series of longs but we need to handle
1654 * 8-byte unaligned accesses. args points to raw_data within the event
1655 * and raw_data is guaranteed to be 8-byte unaligned because it is
1656 * preceded by raw_size which is a u32. So we need to copy args to a temp
1657 * variable to read it. Most notably this avoids extended load instructions
1658 * on unaligned addresses
1659 */
1660
1661 static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
1662 unsigned char *args, struct trace *trace,
1663 struct thread *thread)
1664 {
1665 size_t printed = 0;
1666 unsigned char *p;
1667 unsigned long val;
1668
1669 if (sc->args != NULL) {
1670 struct format_field *field;
1671 u8 bit = 1;
1672 struct syscall_arg arg = {
1673 .idx = 0,
1674 .mask = 0,
1675 .trace = trace,
1676 .thread = thread,
1677 };
1678
1679 for (field = sc->args; field;
1680 field = field->next, ++arg.idx, bit <<= 1) {
1681 if (arg.mask & bit)
1682 continue;
1683
1684 /* special care for unaligned accesses */
1685 p = args + sizeof(unsigned long) * arg.idx;
1686 memcpy(&val, p, sizeof(val));
1687
1688 /*
1689 * Suppress this argument if its value is zero and
1690 * and we don't have a string associated in an
1691 * strarray for it.
1692 */
1693 if (val == 0 &&
1694 !(sc->arg_scnprintf &&
1695 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1696 sc->arg_parm[arg.idx]))
1697 continue;
1698
1699 printed += scnprintf(bf + printed, size - printed,
1700 "%s%s: ", printed ? ", " : "", field->name);
1701 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
1702 arg.val = val;
1703 if (sc->arg_parm)
1704 arg.parm = sc->arg_parm[arg.idx];
1705 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1706 size - printed, &arg);
1707 } else {
1708 printed += scnprintf(bf + printed, size - printed,
1709 "%ld", val);
1710 }
1711 }
1712 } else {
1713 int i = 0;
1714
1715 while (i < 6) {
1716 /* special care for unaligned accesses */
1717 p = args + sizeof(unsigned long) * i;
1718 memcpy(&val, p, sizeof(val));
1719 printed += scnprintf(bf + printed, size - printed,
1720 "%sarg%d: %ld",
1721 printed ? ", " : "", i, val);
1722 ++i;
1723 }
1724 }
1725
1726 return printed;
1727 }
1728
1729 typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
1730 union perf_event *event,
1731 struct perf_sample *sample);
1732
1733 static struct syscall *trace__syscall_info(struct trace *trace,
1734 struct perf_evsel *evsel, int id)
1735 {
1736
1737 if (id < 0) {
1738
1739 /*
1740 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1741 * before that, leaving at a higher verbosity level till that is
1742 * explained. Reproduced with plain ftrace with:
1743 *
1744 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1745 * grep "NR -1 " /t/trace_pipe
1746 *
1747 * After generating some load on the machine.
1748 */
1749 if (verbose > 1) {
1750 static u64 n;
1751 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1752 id, perf_evsel__name(evsel), ++n);
1753 }
1754 return NULL;
1755 }
1756
1757 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1758 trace__read_syscall_info(trace, id))
1759 goto out_cant_read;
1760
1761 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1762 goto out_cant_read;
1763
1764 return &trace->syscalls.table[id];
1765
1766 out_cant_read:
1767 if (verbose) {
1768 fprintf(trace->output, "Problems reading syscall %d", id);
1769 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1770 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1771 fputs(" information\n", trace->output);
1772 }
1773 return NULL;
1774 }
1775
1776 static void thread__update_stats(struct thread_trace *ttrace,
1777 int id, struct perf_sample *sample)
1778 {
1779 struct int_node *inode;
1780 struct stats *stats;
1781 u64 duration = 0;
1782
1783 inode = intlist__findnew(ttrace->syscall_stats, id);
1784 if (inode == NULL)
1785 return;
1786
1787 stats = inode->priv;
1788 if (stats == NULL) {
1789 stats = malloc(sizeof(struct stats));
1790 if (stats == NULL)
1791 return;
1792 init_stats(stats);
1793 inode->priv = stats;
1794 }
1795
1796 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1797 duration = sample->time - ttrace->entry_time;
1798
1799 update_stats(stats, duration);
1800 }
1801
1802 static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1803 {
1804 struct thread_trace *ttrace;
1805 u64 duration;
1806 size_t printed;
1807
1808 if (trace->current == NULL)
1809 return 0;
1810
1811 ttrace = thread__priv(trace->current);
1812
1813 if (!ttrace->entry_pending)
1814 return 0;
1815
1816 duration = sample->time - ttrace->entry_time;
1817
1818 printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output);
1819 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1820 ttrace->entry_pending = false;
1821
1822 return printed;
1823 }
1824
1825 static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
1826 union perf_event *event __maybe_unused,
1827 struct perf_sample *sample)
1828 {
1829 char *msg;
1830 void *args;
1831 size_t printed = 0;
1832 struct thread *thread;
1833 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
1834 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1835 struct thread_trace *ttrace;
1836
1837 if (sc == NULL)
1838 return -1;
1839
1840 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1841 ttrace = thread__trace(thread, trace->output);
1842 if (ttrace == NULL)
1843 goto out_put;
1844
1845 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
1846
1847 if (ttrace->entry_str == NULL) {
1848 ttrace->entry_str = malloc(trace__entry_str_size);
1849 if (!ttrace->entry_str)
1850 goto out_put;
1851 }
1852
1853 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
1854 trace__printf_interrupted_entry(trace, sample);
1855
1856 ttrace->entry_time = sample->time;
1857 msg = ttrace->entry_str;
1858 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
1859
1860 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
1861 args, trace, thread);
1862
1863 if (sc->is_exit) {
1864 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
1865 trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output);
1866 fprintf(trace->output, "%-70s\n", ttrace->entry_str);
1867 }
1868 } else {
1869 ttrace->entry_pending = true;
1870 /* See trace__vfs_getname & trace__sys_exit */
1871 ttrace->filename.pending_open = false;
1872 }
1873
1874 if (trace->current != thread) {
1875 thread__put(trace->current);
1876 trace->current = thread__get(thread);
1877 }
1878 err = 0;
1879 out_put:
1880 thread__put(thread);
1881 return err;
1882 }
1883
1884 static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1885 struct perf_sample *sample,
1886 struct callchain_cursor *cursor)
1887 {
1888 struct addr_location al;
1889
1890 if (machine__resolve(trace->host, &al, sample) < 0 ||
1891 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1892 return -1;
1893
1894 return 0;
1895 }
1896
1897 static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1898 {
1899 /* TODO: user-configurable print_opts */
1900 const unsigned int print_opts = EVSEL__PRINT_SYM |
1901 EVSEL__PRINT_DSO |
1902 EVSEL__PRINT_UNKNOWN_AS_ADDR;
1903
1904 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
1905 }
1906
1907 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
1908 union perf_event *event __maybe_unused,
1909 struct perf_sample *sample)
1910 {
1911 long ret;
1912 u64 duration = 0;
1913 struct thread *thread;
1914 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
1915 struct syscall *sc = trace__syscall_info(trace, evsel, id);
1916 struct thread_trace *ttrace;
1917
1918 if (sc == NULL)
1919 return -1;
1920
1921 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1922 ttrace = thread__trace(thread, trace->output);
1923 if (ttrace == NULL)
1924 goto out_put;
1925
1926 if (trace->summary)
1927 thread__update_stats(ttrace, id, sample);
1928
1929 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
1930
1931 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
1932 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1933 ttrace->filename.pending_open = false;
1934 ++trace->stats.vfs_getname;
1935 }
1936
1937 ttrace->exit_time = sample->time;
1938
1939 if (ttrace->entry_time) {
1940 duration = sample->time - ttrace->entry_time;
1941 if (trace__filter_duration(trace, duration))
1942 goto out;
1943 } else if (trace->duration_filter)
1944 goto out;
1945
1946 if (sample->callchain) {
1947 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1948 if (callchain_ret == 0) {
1949 if (callchain_cursor.nr < trace->min_stack)
1950 goto out;
1951 callchain_ret = 1;
1952 }
1953 }
1954
1955 if (trace->summary_only)
1956 goto out;
1957
1958 trace__fprintf_entry_head(trace, thread, duration, sample->time, trace->output);
1959
1960 if (ttrace->entry_pending) {
1961 fprintf(trace->output, "%-70s", ttrace->entry_str);
1962 } else {
1963 fprintf(trace->output, " ... [");
1964 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1965 fprintf(trace->output, "]: %s()", sc->name);
1966 }
1967
1968 if (sc->fmt == NULL) {
1969 signed_print:
1970 fprintf(trace->output, ") = %ld", ret);
1971 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
1972 char bf[STRERR_BUFSIZE];
1973 const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
1974 *e = audit_errno_to_name(-ret);
1975
1976 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1977 } else if (ret == 0 && sc->fmt->timeout)
1978 fprintf(trace->output, ") = 0 Timeout");
1979 else if (sc->fmt->hexret)
1980 fprintf(trace->output, ") = %#lx", ret);
1981 else if (sc->fmt->errpid) {
1982 struct thread *child = machine__find_thread(trace->host, ret, ret);
1983
1984 if (child != NULL) {
1985 fprintf(trace->output, ") = %ld", ret);
1986 if (child->comm_set)
1987 fprintf(trace->output, " (%s)", thread__comm_str(child));
1988 thread__put(child);
1989 }
1990 } else
1991 goto signed_print;
1992
1993 fputc('\n', trace->output);
1994
1995 if (callchain_ret > 0)
1996 trace__fprintf_callchain(trace, sample);
1997 else if (callchain_ret < 0)
1998 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1999 out:
2000 ttrace->entry_pending = false;
2001 err = 0;
2002 out_put:
2003 thread__put(thread);
2004 return err;
2005 }
2006
2007 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
2008 union perf_event *event __maybe_unused,
2009 struct perf_sample *sample)
2010 {
2011 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2012 struct thread_trace *ttrace;
2013 size_t filename_len, entry_str_len, to_move;
2014 ssize_t remaining_space;
2015 char *pos;
2016 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
2017
2018 if (!thread)
2019 goto out;
2020
2021 ttrace = thread__priv(thread);
2022 if (!ttrace)
2023 goto out;
2024
2025 filename_len = strlen(filename);
2026
2027 if (ttrace->filename.namelen < filename_len) {
2028 char *f = realloc(ttrace->filename.name, filename_len + 1);
2029
2030 if (f == NULL)
2031 goto out;
2032
2033 ttrace->filename.namelen = filename_len;
2034 ttrace->filename.name = f;
2035 }
2036
2037 strcpy(ttrace->filename.name, filename);
2038 ttrace->filename.pending_open = true;
2039
2040 if (!ttrace->filename.ptr)
2041 goto out;
2042
2043 entry_str_len = strlen(ttrace->entry_str);
2044 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
2045 if (remaining_space <= 0)
2046 goto out;
2047
2048 if (filename_len > (size_t)remaining_space) {
2049 filename += filename_len - remaining_space;
2050 filename_len = remaining_space;
2051 }
2052
2053 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
2054 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
2055 memmove(pos + filename_len, pos, to_move);
2056 memcpy(pos, filename, filename_len);
2057
2058 ttrace->filename.ptr = 0;
2059 ttrace->filename.entry_str_pos = 0;
2060 out:
2061 return 0;
2062 }
2063
2064 static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
2065 union perf_event *event __maybe_unused,
2066 struct perf_sample *sample)
2067 {
2068 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
2069 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
2070 struct thread *thread = machine__findnew_thread(trace->host,
2071 sample->pid,
2072 sample->tid);
2073 struct thread_trace *ttrace = thread__trace(thread, trace->output);
2074
2075 if (ttrace == NULL)
2076 goto out_dump;
2077
2078 ttrace->runtime_ms += runtime_ms;
2079 trace->runtime_ms += runtime_ms;
2080 thread__put(thread);
2081 return 0;
2082
2083 out_dump:
2084 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
2085 evsel->name,
2086 perf_evsel__strval(evsel, sample, "comm"),
2087 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
2088 runtime,
2089 perf_evsel__intval(evsel, sample, "vruntime"));
2090 thread__put(thread);
2091 return 0;
2092 }
2093
2094 static void bpf_output__printer(enum binary_printer_ops op,
2095 unsigned int val, void *extra)
2096 {
2097 FILE *output = extra;
2098 unsigned char ch = (unsigned char)val;
2099
2100 switch (op) {
2101 case BINARY_PRINT_CHAR_DATA:
2102 fprintf(output, "%c", isprint(ch) ? ch : '.');
2103 break;
2104 case BINARY_PRINT_DATA_BEGIN:
2105 case BINARY_PRINT_LINE_BEGIN:
2106 case BINARY_PRINT_ADDR:
2107 case BINARY_PRINT_NUM_DATA:
2108 case BINARY_PRINT_NUM_PAD:
2109 case BINARY_PRINT_SEP:
2110 case BINARY_PRINT_CHAR_PAD:
2111 case BINARY_PRINT_LINE_END:
2112 case BINARY_PRINT_DATA_END:
2113 default:
2114 break;
2115 }
2116 }
2117
2118 static void bpf_output__fprintf(struct trace *trace,
2119 struct perf_sample *sample)
2120 {
2121 print_binary(sample->raw_data, sample->raw_size, 8,
2122 bpf_output__printer, trace->output);
2123 }
2124
2125 static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
2126 union perf_event *event __maybe_unused,
2127 struct perf_sample *sample)
2128 {
2129 trace__printf_interrupted_entry(trace, sample);
2130 trace__fprintf_tstamp(trace, sample->time, trace->output);
2131
2132 if (trace->trace_syscalls)
2133 fprintf(trace->output, "( ): ");
2134
2135 fprintf(trace->output, "%s:", evsel->name);
2136
2137 if (perf_evsel__is_bpf_output(evsel)) {
2138 bpf_output__fprintf(trace, sample);
2139 } else if (evsel->tp_format) {
2140 event_format__fprintf(evsel->tp_format, sample->cpu,
2141 sample->raw_data, sample->raw_size,
2142 trace->output);
2143 }
2144
2145 fprintf(trace->output, ")\n");
2146
2147 if (sample->callchain) {
2148 if (trace__resolve_callchain(trace, evsel, sample, &callchain_cursor) == 0)
2149 trace__fprintf_callchain(trace, sample);
2150 }
2151
2152 return 0;
2153 }
2154
2155 static void print_location(FILE *f, struct perf_sample *sample,
2156 struct addr_location *al,
2157 bool print_dso, bool print_sym)
2158 {
2159
2160 if ((verbose || print_dso) && al->map)
2161 fprintf(f, "%s@", al->map->dso->long_name);
2162
2163 if ((verbose || print_sym) && al->sym)
2164 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
2165 al->addr - al->sym->start);
2166 else if (al->map)
2167 fprintf(f, "0x%" PRIx64, al->addr);
2168 else
2169 fprintf(f, "0x%" PRIx64, sample->addr);
2170 }
2171
2172 static int trace__pgfault(struct trace *trace,
2173 struct perf_evsel *evsel,
2174 union perf_event *event __maybe_unused,
2175 struct perf_sample *sample)
2176 {
2177 struct thread *thread;
2178 struct addr_location al;
2179 char map_type = 'd';
2180 struct thread_trace *ttrace;
2181 int err = -1;
2182
2183 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2184 ttrace = thread__trace(thread, trace->output);
2185 if (ttrace == NULL)
2186 goto out_put;
2187
2188 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2189 ttrace->pfmaj++;
2190 else
2191 ttrace->pfmin++;
2192
2193 if (trace->summary_only)
2194 goto out;
2195
2196 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
2197 sample->ip, &al);
2198
2199 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
2200
2201 fprintf(trace->output, "%sfault [",
2202 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2203 "maj" : "min");
2204
2205 print_location(trace->output, sample, &al, false, true);
2206
2207 fprintf(trace->output, "] => ");
2208
2209 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
2210 sample->addr, &al);
2211
2212 if (!al.map) {
2213 thread__find_addr_location(thread, sample->cpumode,
2214 MAP__FUNCTION, sample->addr, &al);
2215
2216 if (al.map)
2217 map_type = 'x';
2218 else
2219 map_type = '?';
2220 }
2221
2222 print_location(trace->output, sample, &al, true, false);
2223
2224 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
2225 out:
2226 err = 0;
2227 out_put:
2228 thread__put(thread);
2229 return err;
2230 }
2231
2232 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
2233 {
2234 if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
2235 (trace->tid_list && intlist__find(trace->tid_list, sample->tid)))
2236 return false;
2237
2238 if (trace->pid_list || trace->tid_list)
2239 return true;
2240
2241 return false;
2242 }
2243
2244 static void trace__set_base_time(struct trace *trace,
2245 struct perf_evsel *evsel,
2246 struct perf_sample *sample)
2247 {
2248 /*
2249 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2250 * and don't use sample->time unconditionally, we may end up having
2251 * some other event in the future without PERF_SAMPLE_TIME for good
2252 * reason, i.e. we may not be interested in its timestamps, just in
2253 * it taking place, picking some piece of information when it
2254 * appears in our event stream (vfs_getname comes to mind).
2255 */
2256 if (trace->base_time == 0 && !trace->full_time &&
2257 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
2258 trace->base_time = sample->time;
2259 }
2260
2261 static int trace__process_sample(struct perf_tool *tool,
2262 union perf_event *event,
2263 struct perf_sample *sample,
2264 struct perf_evsel *evsel,
2265 struct machine *machine __maybe_unused)
2266 {
2267 struct trace *trace = container_of(tool, struct trace, tool);
2268 int err = 0;
2269
2270 tracepoint_handler handler = evsel->handler;
2271
2272 if (skip_sample(trace, sample))
2273 return 0;
2274
2275 trace__set_base_time(trace, evsel, sample);
2276
2277 if (handler) {
2278 ++trace->nr_events;
2279 handler(trace, evsel, event, sample);
2280 }
2281
2282 return err;
2283 }
2284
2285 static int parse_target_str(struct trace *trace)
2286 {
2287 if (trace->opts.target.pid) {
2288 trace->pid_list = intlist__new(trace->opts.target.pid);
2289 if (trace->pid_list == NULL) {
2290 pr_err("Error parsing process id string\n");
2291 return -EINVAL;
2292 }
2293 }
2294
2295 if (trace->opts.target.tid) {
2296 trace->tid_list = intlist__new(trace->opts.target.tid);
2297 if (trace->tid_list == NULL) {
2298 pr_err("Error parsing thread id string\n");
2299 return -EINVAL;
2300 }
2301 }
2302
2303 return 0;
2304 }
2305
2306 static int trace__record(struct trace *trace, int argc, const char **argv)
2307 {
2308 unsigned int rec_argc, i, j;
2309 const char **rec_argv;
2310 const char * const record_args[] = {
2311 "record",
2312 "-R",
2313 "-m", "1024",
2314 "-c", "1",
2315 };
2316
2317 const char * const sc_args[] = { "-e", };
2318 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2319 const char * const majpf_args[] = { "-e", "major-faults" };
2320 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2321 const char * const minpf_args[] = { "-e", "minor-faults" };
2322 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2323
2324 /* +1 is for the event string below */
2325 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2326 majpf_args_nr + minpf_args_nr + argc;
2327 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2328
2329 if (rec_argv == NULL)
2330 return -ENOMEM;
2331
2332 j = 0;
2333 for (i = 0; i < ARRAY_SIZE(record_args); i++)
2334 rec_argv[j++] = record_args[i];
2335
2336 if (trace->trace_syscalls) {
2337 for (i = 0; i < sc_args_nr; i++)
2338 rec_argv[j++] = sc_args[i];
2339
2340 /* event string may be different for older kernels - e.g., RHEL6 */
2341 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2342 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2343 else if (is_valid_tracepoint("syscalls:sys_enter"))
2344 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2345 else {
2346 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2347 return -1;
2348 }
2349 }
2350
2351 if (trace->trace_pgfaults & TRACE_PFMAJ)
2352 for (i = 0; i < majpf_args_nr; i++)
2353 rec_argv[j++] = majpf_args[i];
2354
2355 if (trace->trace_pgfaults & TRACE_PFMIN)
2356 for (i = 0; i < minpf_args_nr; i++)
2357 rec_argv[j++] = minpf_args[i];
2358
2359 for (i = 0; i < (unsigned int)argc; i++)
2360 rec_argv[j++] = argv[i];
2361
2362 return cmd_record(j, rec_argv, NULL);
2363 }
2364
2365 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2366
2367 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
2368 {
2369 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
2370
2371 if (IS_ERR(evsel))
2372 return false;
2373
2374 if (perf_evsel__field(evsel, "pathname") == NULL) {
2375 perf_evsel__delete(evsel);
2376 return false;
2377 }
2378
2379 evsel->handler = trace__vfs_getname;
2380 perf_evlist__add(evlist, evsel);
2381 return true;
2382 }
2383
2384 static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
2385 u64 config)
2386 {
2387 struct perf_evsel *evsel;
2388 struct perf_event_attr attr = {
2389 .type = PERF_TYPE_SOFTWARE,
2390 .mmap_data = 1,
2391 };
2392
2393 attr.config = config;
2394 attr.sample_period = 1;
2395
2396 event_attr_init(&attr);
2397
2398 evsel = perf_evsel__new(&attr);
2399 if (!evsel)
2400 return -ENOMEM;
2401
2402 evsel->handler = trace__pgfault;
2403 perf_evlist__add(evlist, evsel);
2404
2405 return 0;
2406 }
2407
2408 static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2409 {
2410 const u32 type = event->header.type;
2411 struct perf_evsel *evsel;
2412
2413 if (type != PERF_RECORD_SAMPLE) {
2414 trace__process_event(trace, trace->host, event, sample);
2415 return;
2416 }
2417
2418 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2419 if (evsel == NULL) {
2420 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2421 return;
2422 }
2423
2424 trace__set_base_time(trace, evsel, sample);
2425
2426 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2427 sample->raw_data == NULL) {
2428 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2429 perf_evsel__name(evsel), sample->tid,
2430 sample->cpu, sample->raw_size);
2431 } else {
2432 tracepoint_handler handler = evsel->handler;
2433 handler(trace, evsel, event, sample);
2434 }
2435 }
2436
2437 static int trace__add_syscall_newtp(struct trace *trace)
2438 {
2439 int ret = -1;
2440 struct perf_evlist *evlist = trace->evlist;
2441 struct perf_evsel *sys_enter, *sys_exit;
2442
2443 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2444 if (sys_enter == NULL)
2445 goto out;
2446
2447 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2448 goto out_delete_sys_enter;
2449
2450 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2451 if (sys_exit == NULL)
2452 goto out_delete_sys_enter;
2453
2454 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2455 goto out_delete_sys_exit;
2456
2457 perf_evlist__add(evlist, sys_enter);
2458 perf_evlist__add(evlist, sys_exit);
2459
2460 if (trace->opts.callgraph_set && !trace->kernel_syscallchains) {
2461 /*
2462 * We're interested only in the user space callchain
2463 * leading to the syscall, allow overriding that for
2464 * debugging reasons using --kernel_syscall_callchains
2465 */
2466 sys_exit->attr.exclude_callchain_kernel = 1;
2467 }
2468
2469 trace->syscalls.events.sys_enter = sys_enter;
2470 trace->syscalls.events.sys_exit = sys_exit;
2471
2472 ret = 0;
2473 out:
2474 return ret;
2475
2476 out_delete_sys_exit:
2477 perf_evsel__delete_priv(sys_exit);
2478 out_delete_sys_enter:
2479 perf_evsel__delete_priv(sys_enter);
2480 goto out;
2481 }
2482
2483 static int trace__set_ev_qualifier_filter(struct trace *trace)
2484 {
2485 int err = -1;
2486 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2487 trace->ev_qualifier_ids.nr,
2488 trace->ev_qualifier_ids.entries);
2489
2490 if (filter == NULL)
2491 goto out_enomem;
2492
2493 if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter))
2494 err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter);
2495
2496 free(filter);
2497 out:
2498 return err;
2499 out_enomem:
2500 errno = ENOMEM;
2501 goto out;
2502 }
2503
2504 static int trace__run(struct trace *trace, int argc, const char **argv)
2505 {
2506 struct perf_evlist *evlist = trace->evlist;
2507 struct perf_evsel *evsel;
2508 int err = -1, i;
2509 unsigned long before;
2510 const bool forks = argc > 0;
2511 bool draining = false;
2512
2513 trace->live = true;
2514
2515 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
2516 goto out_error_raw_syscalls;
2517
2518 if (trace->trace_syscalls)
2519 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
2520
2521 if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
2522 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) {
2523 goto out_error_mem;
2524 }
2525
2526 if ((trace->trace_pgfaults & TRACE_PFMIN) &&
2527 perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
2528 goto out_error_mem;
2529
2530 if (trace->sched &&
2531 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2532 trace__sched_stat_runtime))
2533 goto out_error_sched_stat_runtime;
2534
2535 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2536 if (err < 0) {
2537 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
2538 goto out_delete_evlist;
2539 }
2540
2541 err = trace__symbols_init(trace, evlist);
2542 if (err < 0) {
2543 fprintf(trace->output, "Problems initializing symbol libraries!\n");
2544 goto out_delete_evlist;
2545 }
2546
2547 perf_evlist__config(evlist, &trace->opts, NULL);
2548
2549 if (trace->opts.callgraph_set && trace->syscalls.events.sys_exit) {
2550 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2551 &trace->opts, &callchain_param);
2552 /*
2553 * Now we have evsels with different sample_ids, use
2554 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2555 * from a fixed position in each ring buffer record.
2556 *
2557 * As of this the changeset introducing this comment, this
2558 * isn't strictly needed, as the fields that can come before
2559 * PERF_SAMPLE_ID are all used, but we'll probably disable
2560 * some of those for things like copying the payload of
2561 * pointer syscall arguments, and for vfs_getname we don't
2562 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2563 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2564 */
2565 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2566 perf_evlist__reset_sample_bit(evlist, ID);
2567 }
2568
2569 signal(SIGCHLD, sig_handler);
2570 signal(SIGINT, sig_handler);
2571
2572 if (forks) {
2573 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
2574 argv, false, NULL);
2575 if (err < 0) {
2576 fprintf(trace->output, "Couldn't run the workload!\n");
2577 goto out_delete_evlist;
2578 }
2579 }
2580
2581 err = perf_evlist__open(evlist);
2582 if (err < 0)
2583 goto out_error_open;
2584
2585 err = bpf__apply_obj_config();
2586 if (err) {
2587 char errbuf[BUFSIZ];
2588
2589 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2590 pr_err("ERROR: Apply config to BPF failed: %s\n",
2591 errbuf);
2592 goto out_error_open;
2593 }
2594
2595 /*
2596 * Better not use !target__has_task() here because we need to cover the
2597 * case where no threads were specified in the command line, but a
2598 * workload was, and in that case we will fill in the thread_map when
2599 * we fork the workload in perf_evlist__prepare_workload.
2600 */
2601 if (trace->filter_pids.nr > 0)
2602 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
2603 else if (thread_map__pid(evlist->threads, 0) == -1)
2604 err = perf_evlist__set_filter_pid(evlist, getpid());
2605
2606 if (err < 0)
2607 goto out_error_mem;
2608
2609 if (trace->ev_qualifier_ids.nr > 0) {
2610 err = trace__set_ev_qualifier_filter(trace);
2611 if (err < 0)
2612 goto out_errno;
2613
2614 pr_debug("event qualifier tracepoint filter: %s\n",
2615 trace->syscalls.events.sys_exit->filter);
2616 }
2617
2618 err = perf_evlist__apply_filters(evlist, &evsel);
2619 if (err < 0)
2620 goto out_error_apply_filters;
2621
2622 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
2623 if (err < 0)
2624 goto out_error_mmap;
2625
2626 if (!target__none(&trace->opts.target))
2627 perf_evlist__enable(evlist);
2628
2629 if (forks)
2630 perf_evlist__start_workload(evlist);
2631
2632 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
2633 evlist->threads->nr > 1 ||
2634 perf_evlist__first(evlist)->attr.inherit;
2635 again:
2636 before = trace->nr_events;
2637
2638 for (i = 0; i < evlist->nr_mmaps; i++) {
2639 union perf_event *event;
2640
2641 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
2642 struct perf_sample sample;
2643
2644 ++trace->nr_events;
2645
2646 err = perf_evlist__parse_sample(evlist, event, &sample);
2647 if (err) {
2648 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
2649 goto next_event;
2650 }
2651
2652 trace__handle_event(trace, event, &sample);
2653 next_event:
2654 perf_evlist__mmap_consume(evlist, i);
2655
2656 if (interrupted)
2657 goto out_disable;
2658
2659 if (done && !draining) {
2660 perf_evlist__disable(evlist);
2661 draining = true;
2662 }
2663 }
2664 }
2665
2666 if (trace->nr_events == before) {
2667 int timeout = done ? 100 : -1;
2668
2669 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2670 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2671 draining = true;
2672
2673 goto again;
2674 }
2675 } else {
2676 goto again;
2677 }
2678
2679 out_disable:
2680 thread__zput(trace->current);
2681
2682 perf_evlist__disable(evlist);
2683
2684 if (!err) {
2685 if (trace->summary)
2686 trace__fprintf_thread_summary(trace, trace->output);
2687
2688 if (trace->show_tool_stats) {
2689 fprintf(trace->output, "Stats:\n "
2690 " vfs_getname : %" PRIu64 "\n"
2691 " proc_getname: %" PRIu64 "\n",
2692 trace->stats.vfs_getname,
2693 trace->stats.proc_getname);
2694 }
2695 }
2696
2697 out_delete_evlist:
2698 perf_evlist__delete(evlist);
2699 trace->evlist = NULL;
2700 trace->live = false;
2701 return err;
2702 {
2703 char errbuf[BUFSIZ];
2704
2705 out_error_sched_stat_runtime:
2706 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2707 goto out_error;
2708
2709 out_error_raw_syscalls:
2710 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
2711 goto out_error;
2712
2713 out_error_mmap:
2714 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2715 goto out_error;
2716
2717 out_error_open:
2718 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2719
2720 out_error:
2721 fprintf(trace->output, "%s\n", errbuf);
2722 goto out_delete_evlist;
2723
2724 out_error_apply_filters:
2725 fprintf(trace->output,
2726 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2727 evsel->filter, perf_evsel__name(evsel), errno,
2728 strerror_r(errno, errbuf, sizeof(errbuf)));
2729 goto out_delete_evlist;
2730 }
2731 out_error_mem:
2732 fprintf(trace->output, "Not enough memory to run!\n");
2733 goto out_delete_evlist;
2734
2735 out_errno:
2736 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2737 goto out_delete_evlist;
2738 }
2739
2740 static int trace__replay(struct trace *trace)
2741 {
2742 const struct perf_evsel_str_handler handlers[] = {
2743 { "probe:vfs_getname", trace__vfs_getname, },
2744 };
2745 struct perf_data_file file = {
2746 .path = input_name,
2747 .mode = PERF_DATA_MODE_READ,
2748 .force = trace->force,
2749 };
2750 struct perf_session *session;
2751 struct perf_evsel *evsel;
2752 int err = -1;
2753
2754 trace->tool.sample = trace__process_sample;
2755 trace->tool.mmap = perf_event__process_mmap;
2756 trace->tool.mmap2 = perf_event__process_mmap2;
2757 trace->tool.comm = perf_event__process_comm;
2758 trace->tool.exit = perf_event__process_exit;
2759 trace->tool.fork = perf_event__process_fork;
2760 trace->tool.attr = perf_event__process_attr;
2761 trace->tool.tracing_data = perf_event__process_tracing_data;
2762 trace->tool.build_id = perf_event__process_build_id;
2763
2764 trace->tool.ordered_events = true;
2765 trace->tool.ordering_requires_timestamps = true;
2766
2767 /* add tid to output */
2768 trace->multiple_threads = true;
2769
2770 session = perf_session__new(&file, false, &trace->tool);
2771 if (session == NULL)
2772 return -1;
2773
2774 if (symbol__init(&session->header.env) < 0)
2775 goto out;
2776
2777 trace->host = &session->machines.host;
2778
2779 err = perf_session__set_tracepoints_handlers(session, handlers);
2780 if (err)
2781 goto out;
2782
2783 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2784 "raw_syscalls:sys_enter");
2785 /* older kernels have syscalls tp versus raw_syscalls */
2786 if (evsel == NULL)
2787 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2788 "syscalls:sys_enter");
2789
2790 if (evsel &&
2791 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2792 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
2793 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2794 goto out;
2795 }
2796
2797 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2798 "raw_syscalls:sys_exit");
2799 if (evsel == NULL)
2800 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2801 "syscalls:sys_exit");
2802 if (evsel &&
2803 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2804 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
2805 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2806 goto out;
2807 }
2808
2809 evlist__for_each(session->evlist, evsel) {
2810 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2811 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2812 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2813 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2814 evsel->handler = trace__pgfault;
2815 }
2816
2817 err = parse_target_str(trace);
2818 if (err != 0)
2819 goto out;
2820
2821 setup_pager();
2822
2823 err = perf_session__process_events(session);
2824 if (err)
2825 pr_err("Failed to process events, error %d", err);
2826
2827 else if (trace->summary)
2828 trace__fprintf_thread_summary(trace, trace->output);
2829
2830 out:
2831 perf_session__delete(session);
2832
2833 return err;
2834 }
2835
2836 static size_t trace__fprintf_threads_header(FILE *fp)
2837 {
2838 size_t printed;
2839
2840 printed = fprintf(fp, "\n Summary of events:\n\n");
2841
2842 return printed;
2843 }
2844
2845 static size_t thread__dump_stats(struct thread_trace *ttrace,
2846 struct trace *trace, FILE *fp)
2847 {
2848 struct stats *stats;
2849 size_t printed = 0;
2850 struct syscall *sc;
2851 struct int_node *inode = intlist__first(ttrace->syscall_stats);
2852
2853 if (inode == NULL)
2854 return 0;
2855
2856 printed += fprintf(fp, "\n");
2857
2858 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2859 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2860 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
2861
2862 /* each int_node is a syscall */
2863 while (inode) {
2864 stats = inode->priv;
2865 if (stats) {
2866 double min = (double)(stats->min) / NSEC_PER_MSEC;
2867 double max = (double)(stats->max) / NSEC_PER_MSEC;
2868 double avg = avg_stats(stats);
2869 double pct;
2870 u64 n = (u64) stats->n;
2871
2872 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2873 avg /= NSEC_PER_MSEC;
2874
2875 sc = &trace->syscalls.table[inode->i];
2876 printed += fprintf(fp, " %-15s", sc->name);
2877 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
2878 n, avg * n, min, avg);
2879 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
2880 }
2881
2882 inode = intlist__next(inode);
2883 }
2884
2885 printed += fprintf(fp, "\n\n");
2886
2887 return printed;
2888 }
2889
2890 /* struct used to pass data to per-thread function */
2891 struct summary_data {
2892 FILE *fp;
2893 struct trace *trace;
2894 size_t printed;
2895 };
2896
2897 static int trace__fprintf_one_thread(struct thread *thread, void *priv)
2898 {
2899 struct summary_data *data = priv;
2900 FILE *fp = data->fp;
2901 size_t printed = data->printed;
2902 struct trace *trace = data->trace;
2903 struct thread_trace *ttrace = thread__priv(thread);
2904 double ratio;
2905
2906 if (ttrace == NULL)
2907 return 0;
2908
2909 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2910
2911 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
2912 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
2913 printed += fprintf(fp, "%.1f%%", ratio);
2914 if (ttrace->pfmaj)
2915 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2916 if (ttrace->pfmin)
2917 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
2918 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2919 printed += thread__dump_stats(ttrace, trace, fp);
2920
2921 data->printed += printed;
2922
2923 return 0;
2924 }
2925
2926 static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2927 {
2928 struct summary_data data = {
2929 .fp = fp,
2930 .trace = trace
2931 };
2932 data.printed = trace__fprintf_threads_header(fp);
2933
2934 machine__for_each_thread(trace->host, trace__fprintf_one_thread, &data);
2935
2936 return data.printed;
2937 }
2938
2939 static int trace__set_duration(const struct option *opt, const char *str,
2940 int unset __maybe_unused)
2941 {
2942 struct trace *trace = opt->value;
2943
2944 trace->duration_filter = atof(str);
2945 return 0;
2946 }
2947
2948 static int trace__set_filter_pids(const struct option *opt, const char *str,
2949 int unset __maybe_unused)
2950 {
2951 int ret = -1;
2952 size_t i;
2953 struct trace *trace = opt->value;
2954 /*
2955 * FIXME: introduce a intarray class, plain parse csv and create a
2956 * { int nr, int entries[] } struct...
2957 */
2958 struct intlist *list = intlist__new(str);
2959
2960 if (list == NULL)
2961 return -1;
2962
2963 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2964 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2965
2966 if (trace->filter_pids.entries == NULL)
2967 goto out;
2968
2969 trace->filter_pids.entries[0] = getpid();
2970
2971 for (i = 1; i < trace->filter_pids.nr; ++i)
2972 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2973
2974 intlist__delete(list);
2975 ret = 0;
2976 out:
2977 return ret;
2978 }
2979
2980 static int trace__open_output(struct trace *trace, const char *filename)
2981 {
2982 struct stat st;
2983
2984 if (!stat(filename, &st) && st.st_size) {
2985 char oldname[PATH_MAX];
2986
2987 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2988 unlink(oldname);
2989 rename(filename, oldname);
2990 }
2991
2992 trace->output = fopen(filename, "w");
2993
2994 return trace->output == NULL ? -errno : 0;
2995 }
2996
2997 static int parse_pagefaults(const struct option *opt, const char *str,
2998 int unset __maybe_unused)
2999 {
3000 int *trace_pgfaults = opt->value;
3001
3002 if (strcmp(str, "all") == 0)
3003 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
3004 else if (strcmp(str, "maj") == 0)
3005 *trace_pgfaults |= TRACE_PFMAJ;
3006 else if (strcmp(str, "min") == 0)
3007 *trace_pgfaults |= TRACE_PFMIN;
3008 else
3009 return -1;
3010
3011 return 0;
3012 }
3013
3014 static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
3015 {
3016 struct perf_evsel *evsel;
3017
3018 evlist__for_each(evlist, evsel)
3019 evsel->handler = handler;
3020 }
3021
3022 int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
3023 {
3024 const char *trace_usage[] = {
3025 "perf trace [<options>] [<command>]",
3026 "perf trace [<options>] -- <command> [<options>]",
3027 "perf trace record [<options>] [<command>]",
3028 "perf trace record [<options>] -- <command> [<options>]",
3029 NULL
3030 };
3031 struct trace trace = {
3032 .syscalls = {
3033 . max = -1,
3034 },
3035 .opts = {
3036 .target = {
3037 .uid = UINT_MAX,
3038 .uses_mmap = true,
3039 },
3040 .user_freq = UINT_MAX,
3041 .user_interval = ULLONG_MAX,
3042 .no_buffering = true,
3043 .mmap_pages = UINT_MAX,
3044 .proc_map_timeout = 500,
3045 },
3046 .output = stderr,
3047 .show_comm = true,
3048 .trace_syscalls = true,
3049 .kernel_syscallchains = false,
3050 .max_stack = UINT_MAX,
3051 };
3052 const char *output_name = NULL;
3053 const char *ev_qualifier_str = NULL;
3054 const struct option trace_options[] = {
3055 OPT_CALLBACK(0, "event", &trace.evlist, "event",
3056 "event selector. use 'perf list' to list available events",
3057 parse_events_option),
3058 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3059 "show the thread COMM next to its id"),
3060 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
3061 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
3062 OPT_STRING('o', "output", &output_name, "file", "output file name"),
3063 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
3064 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3065 "trace events on existing process id"),
3066 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
3067 "trace events on existing thread id"),
3068 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3069 "pids to filter (by the kernel)", trace__set_filter_pids),
3070 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
3071 "system-wide collection from all CPUs"),
3072 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
3073 "list of cpus to monitor"),
3074 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
3075 "child tasks do not inherit counters"),
3076 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3077 "number of mmap data pages",
3078 perf_evlist__parse_mmap_pages),
3079 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
3080 "user to profile"),
3081 OPT_CALLBACK(0, "duration", &trace, "float",
3082 "show only events with duration > N.M ms",
3083 trace__set_duration),
3084 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3085 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3086 OPT_BOOLEAN('T', "time", &trace.full_time,
3087 "Show full timestamp, not time relative to first start"),
3088 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3089 "Show only syscall summary with statistics"),
3090 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3091 "Show all syscalls and summary with statistics"),
3092 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3093 "Trace pagefaults", parse_pagefaults, "maj"),
3094 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
3095 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
3096 OPT_CALLBACK(0, "call-graph", &trace.opts,
3097 "record_mode[,record_size]", record_callchain_help,
3098 &record_parse_callchain_opt),
3099 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3100 "Show the kernel callchains on the syscall exit path"),
3101 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3102 "Set the minimum stack depth when parsing the callchain, "
3103 "anything below the specified depth will be ignored."),
3104 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3105 "Set the maximum stack depth when parsing the callchain, "
3106 "anything beyond the specified depth will be ignored. "
3107 "Default: " __stringify(PERF_MAX_STACK_DEPTH)),
3108 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3109 "per thread proc mmap processing timeout in ms"),
3110 OPT_END()
3111 };
3112 bool __maybe_unused max_stack_user_set = true;
3113 bool mmap_pages_user_set = true;
3114 const char * const trace_subcommands[] = { "record", NULL };
3115 int err;
3116 char bf[BUFSIZ];
3117
3118 signal(SIGSEGV, sighandler_dump_stack);
3119 signal(SIGFPE, sighandler_dump_stack);
3120
3121 trace.evlist = perf_evlist__new();
3122 trace.sctbl = syscalltbl__new();
3123
3124 if (trace.evlist == NULL || trace.sctbl == NULL) {
3125 pr_err("Not enough memory to run!\n");
3126 err = -ENOMEM;
3127 goto out;
3128 }
3129
3130 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3131 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
3132
3133 err = bpf__setup_stdout(trace.evlist);
3134 if (err) {
3135 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3136 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3137 goto out;
3138 }
3139
3140 err = -1;
3141
3142 if (trace.trace_pgfaults) {
3143 trace.opts.sample_address = true;
3144 trace.opts.sample_time = true;
3145 }
3146
3147 if (trace.opts.mmap_pages == UINT_MAX)
3148 mmap_pages_user_set = false;
3149
3150 if (trace.max_stack == UINT_MAX) {
3151 trace.max_stack = PERF_MAX_STACK_DEPTH;
3152 max_stack_user_set = false;
3153 }
3154
3155 #ifdef HAVE_DWARF_UNWIND_SUPPORT
3156 if ((trace.min_stack || max_stack_user_set) && !trace.opts.callgraph_set)
3157 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3158 #endif
3159
3160 if (trace.opts.callgraph_set) {
3161 if (!mmap_pages_user_set && geteuid() == 0)
3162 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3163
3164 symbol_conf.use_callchain = true;
3165 }
3166
3167 if (trace.evlist->nr_entries > 0)
3168 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3169
3170 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3171 return trace__record(&trace, argc-1, &argv[1]);
3172
3173 /* summary_only implies summary option, but don't overwrite summary if set */
3174 if (trace.summary_only)
3175 trace.summary = trace.summary_only;
3176
3177 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3178 trace.evlist->nr_entries == 0 /* Was --events used? */) {
3179 pr_err("Please specify something to trace.\n");
3180 return -1;
3181 }
3182
3183 if (!trace.trace_syscalls && ev_qualifier_str) {
3184 pr_err("The -e option can't be used with --no-syscalls.\n");
3185 goto out;
3186 }
3187
3188 if (output_name != NULL) {
3189 err = trace__open_output(&trace, output_name);
3190 if (err < 0) {
3191 perror("failed to create output file");
3192 goto out;
3193 }
3194 }
3195
3196 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3197
3198 if (ev_qualifier_str != NULL) {
3199 const char *s = ev_qualifier_str;
3200 struct strlist_config slist_config = {
3201 .dirname = system_path(STRACE_GROUPS_DIR),
3202 };
3203
3204 trace.not_ev_qualifier = *s == '!';
3205 if (trace.not_ev_qualifier)
3206 ++s;
3207 trace.ev_qualifier = strlist__new(s, &slist_config);
3208 if (trace.ev_qualifier == NULL) {
3209 fputs("Not enough memory to parse event qualifier",
3210 trace.output);
3211 err = -ENOMEM;
3212 goto out_close;
3213 }
3214
3215 err = trace__validate_ev_qualifier(&trace);
3216 if (err)
3217 goto out_close;
3218 }
3219
3220 err = target__validate(&trace.opts.target);
3221 if (err) {
3222 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3223 fprintf(trace.output, "%s", bf);
3224 goto out_close;
3225 }
3226
3227 err = target__parse_uid(&trace.opts.target);
3228 if (err) {
3229 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
3230 fprintf(trace.output, "%s", bf);
3231 goto out_close;
3232 }
3233
3234 if (!argc && target__none(&trace.opts.target))
3235 trace.opts.target.system_wide = true;
3236
3237 if (input_name)
3238 err = trace__replay(&trace);
3239 else
3240 err = trace__run(&trace, argc, argv);
3241
3242 out_close:
3243 if (output_name != NULL)
3244 fclose(trace.output);
3245 out:
3246 return err;
3247 }