4 * Builtin 'trace' command:
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
16 * Released under the GPL v2. (and only v2, not any later version)
19 #include <traceevent/event-parse.h>
20 #include <api/fs/tracing_path.h>
22 #include "util/color.h"
23 #include "util/debug.h"
24 #include "util/evlist.h"
25 #include <subcmd/exec-cmd.h>
26 #include "util/machine.h"
27 #include "util/session.h"
28 #include "util/thread.h"
29 #include <subcmd/parse-options.h>
30 #include "util/strlist.h"
31 #include "util/intlist.h"
32 #include "util/thread_map.h"
33 #include "util/stat.h"
34 #include "trace-event.h"
35 #include "util/parse-events.h"
36 #include "util/bpf-loader.h"
37 #include "callchain.h"
38 #include "syscalltbl.h"
40 #include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
42 #include <linux/futex.h>
43 #include <linux/err.h>
44 #include <linux/seccomp.h>
45 #include <linux/filter.h>
46 #include <linux/audit.h>
47 #include <sys/ptrace.h>
48 #include <linux/random.h>
49 #include <linux/stringify.h>
52 # define O_CLOEXEC 02000000
55 #ifndef MSG_CMSG_CLOEXEC
56 # define MSG_CMSG_CLOEXEC 0x40000000
59 #ifndef PERF_FLAG_FD_NO_GROUP
60 # define PERF_FLAG_FD_NO_GROUP (1UL << 0)
63 #ifndef PERF_FLAG_FD_OUTPUT
64 # define PERF_FLAG_FD_OUTPUT (1UL << 1)
67 #ifndef PERF_FLAG_PID_CGROUP
68 # define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
71 #ifndef PERF_FLAG_FD_CLOEXEC
72 # define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
76 struct perf_tool tool
;
77 struct syscalltbl
*sctbl
;
80 struct syscall
*table
;
82 struct perf_evsel
*sys_enter
,
86 struct record_opts opts
;
87 struct perf_evlist
*evlist
;
89 struct thread
*current
;
92 unsigned long nr_events
;
93 struct strlist
*ev_qualifier
;
98 struct intlist
*tid_list
;
99 struct intlist
*pid_list
;
104 double duration_filter
;
110 unsigned int max_stack
;
111 unsigned int min_stack
;
112 bool not_ev_qualifier
;
116 bool multiple_threads
;
120 bool show_tool_stats
;
122 bool kernel_syscallchains
;
132 u64 (*integer
)(struct tp_field
*field
, struct perf_sample
*sample
);
133 void *(*pointer
)(struct tp_field
*field
, struct perf_sample
*sample
);
137 #define TP_UINT_FIELD(bits) \
138 static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
141 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
150 #define TP_UINT_FIELD__SWAPPED(bits) \
151 static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
154 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
155 return bswap_##bits(value);\
158 TP_UINT_FIELD__SWAPPED(16);
159 TP_UINT_FIELD__SWAPPED(32);
160 TP_UINT_FIELD__SWAPPED(64);
162 static int tp_field__init_uint(struct tp_field
*field
,
163 struct format_field
*format_field
,
166 field
->offset
= format_field
->offset
;
168 switch (format_field
->size
) {
170 field
->integer
= tp_field__u8
;
173 field
->integer
= needs_swap
? tp_field__swapped_u16
: tp_field__u16
;
176 field
->integer
= needs_swap
? tp_field__swapped_u32
: tp_field__u32
;
179 field
->integer
= needs_swap
? tp_field__swapped_u64
: tp_field__u64
;
188 static void *tp_field__ptr(struct tp_field
*field
, struct perf_sample
*sample
)
190 return sample
->raw_data
+ field
->offset
;
193 static int tp_field__init_ptr(struct tp_field
*field
, struct format_field
*format_field
)
195 field
->offset
= format_field
->offset
;
196 field
->pointer
= tp_field__ptr
;
203 struct tp_field args
, ret
;
207 static int perf_evsel__init_tp_uint_field(struct perf_evsel
*evsel
,
208 struct tp_field
*field
,
211 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
213 if (format_field
== NULL
)
216 return tp_field__init_uint(field
, format_field
, evsel
->needs_swap
);
219 #define perf_evsel__init_sc_tp_uint_field(evsel, name) \
220 ({ struct syscall_tp *sc = evsel->priv;\
221 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
223 static int perf_evsel__init_tp_ptr_field(struct perf_evsel
*evsel
,
224 struct tp_field
*field
,
227 struct format_field
*format_field
= perf_evsel__field(evsel
, name
);
229 if (format_field
== NULL
)
232 return tp_field__init_ptr(field
, format_field
);
235 #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
236 ({ struct syscall_tp *sc = evsel->priv;\
237 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
239 static void perf_evsel__delete_priv(struct perf_evsel
*evsel
)
242 perf_evsel__delete(evsel
);
245 static int perf_evsel__init_syscall_tp(struct perf_evsel
*evsel
, void *handler
)
247 evsel
->priv
= malloc(sizeof(struct syscall_tp
));
248 if (evsel
->priv
!= NULL
) {
249 if (perf_evsel__init_sc_tp_uint_field(evsel
, id
))
252 evsel
->handler
= handler
;
263 static struct perf_evsel
*perf_evsel__syscall_newtp(const char *direction
, void *handler
)
265 struct perf_evsel
*evsel
= perf_evsel__newtp("raw_syscalls", direction
);
267 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
269 evsel
= perf_evsel__newtp("syscalls", direction
);
274 if (perf_evsel__init_syscall_tp(evsel
, handler
))
280 perf_evsel__delete_priv(evsel
);
284 #define perf_evsel__sc_tp_uint(evsel, name, sample) \
285 ({ struct syscall_tp *fields = evsel->priv; \
286 fields->name.integer(&fields->name, sample); })
288 #define perf_evsel__sc_tp_ptr(evsel, name, sample) \
289 ({ struct syscall_tp *fields = evsel->priv; \
290 fields->name.pointer(&fields->name, sample); })
294 struct thread
*thread
;
304 const char **entries
;
307 #define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
308 .nr_entries = ARRAY_SIZE(array), \
312 #define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
314 .nr_entries = ARRAY_SIZE(array), \
318 static size_t __syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
320 struct syscall_arg
*arg
)
322 struct strarray
*sa
= arg
->parm
;
323 int idx
= arg
->val
- sa
->offset
;
325 if (idx
< 0 || idx
>= sa
->nr_entries
)
326 return scnprintf(bf
, size
, intfmt
, arg
->val
);
328 return scnprintf(bf
, size
, "%s", sa
->entries
[idx
]);
331 static size_t syscall_arg__scnprintf_strarray(char *bf
, size_t size
,
332 struct syscall_arg
*arg
)
334 return __syscall_arg__scnprintf_strarray(bf
, size
, "%d", arg
);
337 #define SCA_STRARRAY syscall_arg__scnprintf_strarray
339 #if defined(__i386__) || defined(__x86_64__)
341 * FIXME: Make this available to all arches as soon as the ioctl beautifier
342 * gets rewritten to support all arches.
344 static size_t syscall_arg__scnprintf_strhexarray(char *bf
, size_t size
,
345 struct syscall_arg
*arg
)
347 return __syscall_arg__scnprintf_strarray(bf
, size
, "%#x", arg
);
350 #define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
351 #endif /* defined(__i386__) || defined(__x86_64__) */
353 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
354 struct syscall_arg
*arg
);
356 #define SCA_FD syscall_arg__scnprintf_fd
358 static size_t syscall_arg__scnprintf_fd_at(char *bf
, size_t size
,
359 struct syscall_arg
*arg
)
364 return scnprintf(bf
, size
, "CWD");
366 return syscall_arg__scnprintf_fd(bf
, size
, arg
);
369 #define SCA_FDAT syscall_arg__scnprintf_fd_at
371 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
372 struct syscall_arg
*arg
);
374 #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
376 static size_t syscall_arg__scnprintf_hex(char *bf
, size_t size
,
377 struct syscall_arg
*arg
)
379 return scnprintf(bf
, size
, "%#lx", arg
->val
);
382 #define SCA_HEX syscall_arg__scnprintf_hex
384 static size_t syscall_arg__scnprintf_int(char *bf
, size_t size
,
385 struct syscall_arg
*arg
)
387 return scnprintf(bf
, size
, "%d", arg
->val
);
390 #define SCA_INT syscall_arg__scnprintf_int
392 static size_t syscall_arg__scnprintf_flock(char *bf
, size_t size
,
393 struct syscall_arg
*arg
)
395 int printed
= 0, op
= arg
->val
;
398 return scnprintf(bf
, size
, "NONE");
400 if ((op & LOCK_##cmd) == LOCK_##cmd) { \
401 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
416 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", op
);
421 #define SCA_FLOCK syscall_arg__scnprintf_flock
423 static size_t syscall_arg__scnprintf_futex_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
425 enum syscall_futex_args
{
426 SCF_UADDR
= (1 << 0),
429 SCF_TIMEOUT
= (1 << 3),
430 SCF_UADDR2
= (1 << 4),
434 int cmd
= op
& FUTEX_CMD_MASK
;
438 #define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
439 P_FUTEX_OP(WAIT
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
440 P_FUTEX_OP(WAKE
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
441 P_FUTEX_OP(FD
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
442 P_FUTEX_OP(REQUEUE
); arg
->mask
|= SCF_VAL3
|SCF_TIMEOUT
; break;
443 P_FUTEX_OP(CMP_REQUEUE
); arg
->mask
|= SCF_TIMEOUT
; break;
444 P_FUTEX_OP(CMP_REQUEUE_PI
); arg
->mask
|= SCF_TIMEOUT
; break;
445 P_FUTEX_OP(WAKE_OP
); break;
446 P_FUTEX_OP(LOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
447 P_FUTEX_OP(UNLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
|SCF_TIMEOUT
; break;
448 P_FUTEX_OP(TRYLOCK_PI
); arg
->mask
|= SCF_VAL3
|SCF_UADDR2
; break;
449 P_FUTEX_OP(WAIT_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
450 P_FUTEX_OP(WAKE_BITSET
); arg
->mask
|= SCF_UADDR2
; break;
451 P_FUTEX_OP(WAIT_REQUEUE_PI
); break;
452 default: printed
= scnprintf(bf
, size
, "%#x", cmd
); break;
455 if (op
& FUTEX_PRIVATE_FLAG
)
456 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|PRIV");
458 if (op
& FUTEX_CLOCK_REALTIME
)
459 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|CLKRT");
464 #define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
466 static const char *bpf_cmd
[] = {
467 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
468 "MAP_GET_NEXT_KEY", "PROG_LOAD",
470 static DEFINE_STRARRAY(bpf_cmd
);
472 static const char *epoll_ctl_ops
[] = { "ADD", "DEL", "MOD", };
473 static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops
, 1);
475 static const char *itimers
[] = { "REAL", "VIRTUAL", "PROF", };
476 static DEFINE_STRARRAY(itimers
);
478 static const char *keyctl_options
[] = {
479 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
480 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
481 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
482 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
483 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
485 static DEFINE_STRARRAY(keyctl_options
);
487 static const char *whences
[] = { "SET", "CUR", "END",
495 static DEFINE_STRARRAY(whences
);
497 static const char *fcntl_cmds
[] = {
498 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
499 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
500 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
503 static DEFINE_STRARRAY(fcntl_cmds
);
505 static const char *rlimit_resources
[] = {
506 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
507 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
510 static DEFINE_STRARRAY(rlimit_resources
);
512 static const char *sighow
[] = { "BLOCK", "UNBLOCK", "SETMASK", };
513 static DEFINE_STRARRAY(sighow
);
515 static const char *clockid
[] = {
516 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
517 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
518 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
520 static DEFINE_STRARRAY(clockid
);
522 static const char *socket_families
[] = {
523 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
524 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
525 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
526 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
527 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
528 "ALG", "NFC", "VSOCK",
530 static DEFINE_STRARRAY(socket_families
);
533 #define MSG_PROBE 0x10
535 #ifndef MSG_WAITFORONE
536 #define MSG_WAITFORONE 0x10000
538 #ifndef MSG_SENDPAGE_NOTLAST
539 #define MSG_SENDPAGE_NOTLAST 0x20000
542 #define MSG_FASTOPEN 0x20000000
545 static size_t syscall_arg__scnprintf_msg_flags(char *bf
, size_t size
,
546 struct syscall_arg
*arg
)
548 int printed
= 0, flags
= arg
->val
;
551 return scnprintf(bf
, size
, "NONE");
552 #define P_MSG_FLAG(n) \
553 if (flags & MSG_##n) { \
554 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
560 P_MSG_FLAG(DONTROUTE
);
565 P_MSG_FLAG(DONTWAIT
);
572 P_MSG_FLAG(ERRQUEUE
);
573 P_MSG_FLAG(NOSIGNAL
);
575 P_MSG_FLAG(WAITFORONE
);
576 P_MSG_FLAG(SENDPAGE_NOTLAST
);
577 P_MSG_FLAG(FASTOPEN
);
578 P_MSG_FLAG(CMSG_CLOEXEC
);
582 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
587 #define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
589 static size_t syscall_arg__scnprintf_access_mode(char *bf
, size_t size
,
590 struct syscall_arg
*arg
)
595 if (mode
== F_OK
) /* 0 */
596 return scnprintf(bf
, size
, "F");
598 if (mode & n##_OK) { \
599 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
609 printed
+= scnprintf(bf
+ printed
, size
- printed
, "|%#x", mode
);
614 #define SCA_ACCMODE syscall_arg__scnprintf_access_mode
616 static size_t syscall_arg__scnprintf_filename(char *bf
, size_t size
,
617 struct syscall_arg
*arg
);
619 #define SCA_FILENAME syscall_arg__scnprintf_filename
621 static size_t syscall_arg__scnprintf_open_flags(char *bf
, size_t size
,
622 struct syscall_arg
*arg
)
624 int printed
= 0, flags
= arg
->val
;
626 if (!(flags
& O_CREAT
))
627 arg
->mask
|= 1 << (arg
->idx
+ 1); /* Mask the mode parm */
630 return scnprintf(bf
, size
, "RDONLY");
632 if (flags & O_##n) { \
633 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
657 if ((flags
& O_SYNC
) == O_SYNC
)
658 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%s", printed
? "|" : "", "SYNC");
670 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
675 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
677 static size_t syscall_arg__scnprintf_perf_flags(char *bf
, size_t size
,
678 struct syscall_arg
*arg
)
680 int printed
= 0, flags
= arg
->val
;
686 if (flags & PERF_FLAG_##n) { \
687 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
688 flags &= ~PERF_FLAG_##n; \
698 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
703 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
705 static size_t syscall_arg__scnprintf_pipe_flags(char *bf
, size_t size
,
706 struct syscall_arg
*arg
)
708 int printed
= 0, flags
= arg
->val
;
711 if (flags & O_##n) { \
712 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
721 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
726 #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
728 static size_t syscall_arg__scnprintf_signum(char *bf
, size_t size
, struct syscall_arg
*arg
)
733 #define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
776 return scnprintf(bf
, size
, "%#x", sig
);
779 #define SCA_SIGNUM syscall_arg__scnprintf_signum
781 #if defined(__i386__) || defined(__x86_64__)
783 * FIXME: Make this available to all arches.
785 #define TCGETS 0x5401
787 static const char *tioctls
[] = {
788 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
789 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
790 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
791 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
792 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
793 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
794 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
795 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
796 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
797 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
798 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
799 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
800 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
801 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
802 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
805 static DEFINE_STRARRAY_OFFSET(tioctls
, 0x5401);
806 #endif /* defined(__i386__) || defined(__x86_64__) */
808 #ifndef SECCOMP_SET_MODE_STRICT
809 #define SECCOMP_SET_MODE_STRICT 0
811 #ifndef SECCOMP_SET_MODE_FILTER
812 #define SECCOMP_SET_MODE_FILTER 1
815 static size_t syscall_arg__scnprintf_seccomp_op(char *bf
, size_t size
, struct syscall_arg
*arg
)
821 #define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
822 P_SECCOMP_SET_MODE_OP(STRICT
);
823 P_SECCOMP_SET_MODE_OP(FILTER
);
824 #undef P_SECCOMP_SET_MODE_OP
825 default: printed
= scnprintf(bf
, size
, "%#x", op
); break;
831 #define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
833 #ifndef SECCOMP_FILTER_FLAG_TSYNC
834 #define SECCOMP_FILTER_FLAG_TSYNC 1
837 static size_t syscall_arg__scnprintf_seccomp_flags(char *bf
, size_t size
,
838 struct syscall_arg
*arg
)
840 int printed
= 0, flags
= arg
->val
;
843 if (flags & SECCOMP_FILTER_FLAG_##n) { \
844 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
845 flags &= ~SECCOMP_FILTER_FLAG_##n; \
852 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
857 #define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
859 #ifndef GRND_NONBLOCK
860 #define GRND_NONBLOCK 0x0001
863 #define GRND_RANDOM 0x0002
866 static size_t syscall_arg__scnprintf_getrandom_flags(char *bf
, size_t size
,
867 struct syscall_arg
*arg
)
869 int printed
= 0, flags
= arg
->val
;
872 if (flags & GRND_##n) { \
873 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
874 flags &= ~GRND_##n; \
882 printed
+= scnprintf(bf
+ printed
, size
- printed
, "%s%#x", printed
? "|" : "", flags
);
887 #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
889 #define STRARRAY(arg, name, array) \
890 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
891 .arg_parm = { [arg] = &strarray__##array, }
893 #include "trace/beauty/eventfd.c"
894 #include "trace/beauty/pid.c"
895 #include "trace/beauty/mmap.c"
896 #include "trace/beauty/mode_t.c"
897 #include "trace/beauty/sched_policy.c"
898 #include "trace/beauty/socket_type.c"
899 #include "trace/beauty/waitid_options.c"
901 static struct syscall_fmt
{
904 size_t (*arg_scnprintf
[6])(char *bf
, size_t size
, struct syscall_arg
*arg
);
911 { .name
= "access", .errmsg
= true,
912 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */
913 [1] = SCA_ACCMODE
, /* mode */ }, },
914 { .name
= "arch_prctl", .errmsg
= true, .alias
= "prctl", },
915 { .name
= "bpf", .errmsg
= true, STRARRAY(0, cmd
, bpf_cmd
), },
916 { .name
= "brk", .hexret
= true,
917 .arg_scnprintf
= { [0] = SCA_HEX
, /* brk */ }, },
918 { .name
= "chdir", .errmsg
= true,
919 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
920 { .name
= "chmod", .errmsg
= true,
921 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
922 { .name
= "chroot", .errmsg
= true,
923 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
924 { .name
= "clock_gettime", .errmsg
= true, STRARRAY(0, clk_id
, clockid
), },
925 { .name
= "clone", .errpid
= true, },
926 { .name
= "close", .errmsg
= true,
927 .arg_scnprintf
= { [0] = SCA_CLOSE_FD
, /* fd */ }, },
928 { .name
= "connect", .errmsg
= true, },
929 { .name
= "creat", .errmsg
= true,
930 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
931 { .name
= "dup", .errmsg
= true,
932 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
933 { .name
= "dup2", .errmsg
= true,
934 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
935 { .name
= "dup3", .errmsg
= true,
936 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
937 { .name
= "epoll_ctl", .errmsg
= true, STRARRAY(1, op
, epoll_ctl_ops
), },
938 { .name
= "eventfd2", .errmsg
= true,
939 .arg_scnprintf
= { [1] = SCA_EFD_FLAGS
, /* flags */ }, },
940 { .name
= "faccessat", .errmsg
= true,
941 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
942 [1] = SCA_FILENAME
, /* filename */ }, },
943 { .name
= "fadvise64", .errmsg
= true,
944 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
945 { .name
= "fallocate", .errmsg
= true,
946 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
947 { .name
= "fchdir", .errmsg
= true,
948 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
949 { .name
= "fchmod", .errmsg
= true,
950 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
951 { .name
= "fchmodat", .errmsg
= true,
952 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
953 [1] = SCA_FILENAME
, /* filename */ }, },
954 { .name
= "fchown", .errmsg
= true,
955 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
956 { .name
= "fchownat", .errmsg
= true,
957 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
958 [1] = SCA_FILENAME
, /* filename */ }, },
959 { .name
= "fcntl", .errmsg
= true,
960 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
961 [1] = SCA_STRARRAY
, /* cmd */ },
962 .arg_parm
= { [1] = &strarray__fcntl_cmds
, /* cmd */ }, },
963 { .name
= "fdatasync", .errmsg
= true,
964 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
965 { .name
= "flock", .errmsg
= true,
966 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
967 [1] = SCA_FLOCK
, /* cmd */ }, },
968 { .name
= "fsetxattr", .errmsg
= true,
969 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
970 { .name
= "fstat", .errmsg
= true, .alias
= "newfstat",
971 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
972 { .name
= "fstatat", .errmsg
= true, .alias
= "newfstatat",
973 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
974 [1] = SCA_FILENAME
, /* filename */ }, },
975 { .name
= "fstatfs", .errmsg
= true,
976 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
977 { .name
= "fsync", .errmsg
= true,
978 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
979 { .name
= "ftruncate", .errmsg
= true,
980 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
981 { .name
= "futex", .errmsg
= true,
982 .arg_scnprintf
= { [1] = SCA_FUTEX_OP
, /* op */ }, },
983 { .name
= "futimesat", .errmsg
= true,
984 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
985 [1] = SCA_FILENAME
, /* filename */ }, },
986 { .name
= "getdents", .errmsg
= true,
987 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
988 { .name
= "getdents64", .errmsg
= true,
989 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
990 { .name
= "getitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
991 { .name
= "getpid", .errpid
= true, },
992 { .name
= "getpgid", .errpid
= true, },
993 { .name
= "getppid", .errpid
= true, },
994 { .name
= "getrandom", .errmsg
= true,
995 .arg_scnprintf
= { [2] = SCA_GETRANDOM_FLAGS
, /* flags */ }, },
996 { .name
= "getrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
997 { .name
= "getxattr", .errmsg
= true,
998 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
999 { .name
= "inotify_add_watch", .errmsg
= true,
1000 .arg_scnprintf
= { [1] = SCA_FILENAME
, /* pathname */ }, },
1001 { .name
= "ioctl", .errmsg
= true,
1002 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1003 #if defined(__i386__) || defined(__x86_64__)
1005 * FIXME: Make this available to all arches.
1007 [1] = SCA_STRHEXARRAY
, /* cmd */
1008 [2] = SCA_HEX
, /* arg */ },
1009 .arg_parm
= { [1] = &strarray__tioctls
, /* cmd */ }, },
1011 [2] = SCA_HEX
, /* arg */ }, },
1013 { .name
= "keyctl", .errmsg
= true, STRARRAY(0, option
, keyctl_options
), },
1014 { .name
= "kill", .errmsg
= true,
1015 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1016 { .name
= "lchown", .errmsg
= true,
1017 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1018 { .name
= "lgetxattr", .errmsg
= true,
1019 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1020 { .name
= "linkat", .errmsg
= true,
1021 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */ }, },
1022 { .name
= "listxattr", .errmsg
= true,
1023 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1024 { .name
= "llistxattr", .errmsg
= true,
1025 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1026 { .name
= "lremovexattr", .errmsg
= true,
1027 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1028 { .name
= "lseek", .errmsg
= true,
1029 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1030 [2] = SCA_STRARRAY
, /* whence */ },
1031 .arg_parm
= { [2] = &strarray__whences
, /* whence */ }, },
1032 { .name
= "lsetxattr", .errmsg
= true,
1033 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1034 { .name
= "lstat", .errmsg
= true, .alias
= "newlstat",
1035 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1036 { .name
= "lsxattr", .errmsg
= true,
1037 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1038 { .name
= "madvise", .errmsg
= true,
1039 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1040 [2] = SCA_MADV_BHV
, /* behavior */ }, },
1041 { .name
= "mkdir", .errmsg
= true,
1042 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1043 { .name
= "mkdirat", .errmsg
= true,
1044 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1045 [1] = SCA_FILENAME
, /* pathname */ }, },
1046 { .name
= "mknod", .errmsg
= true,
1047 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1048 { .name
= "mknodat", .errmsg
= true,
1049 .arg_scnprintf
= { [0] = SCA_FDAT
, /* fd */
1050 [1] = SCA_FILENAME
, /* filename */ }, },
1051 { .name
= "mlock", .errmsg
= true,
1052 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1053 { .name
= "mlockall", .errmsg
= true,
1054 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1055 { .name
= "mmap", .hexret
= true,
1056 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1057 [2] = SCA_MMAP_PROT
, /* prot */
1058 [3] = SCA_MMAP_FLAGS
, /* flags */
1059 [4] = SCA_FD
, /* fd */ }, },
1060 { .name
= "mprotect", .errmsg
= true,
1061 .arg_scnprintf
= { [0] = SCA_HEX
, /* start */
1062 [2] = SCA_MMAP_PROT
, /* prot */ }, },
1063 { .name
= "mq_unlink", .errmsg
= true,
1064 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* u_name */ }, },
1065 { .name
= "mremap", .hexret
= true,
1066 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */
1067 [3] = SCA_MREMAP_FLAGS
, /* flags */
1068 [4] = SCA_HEX
, /* new_addr */ }, },
1069 { .name
= "munlock", .errmsg
= true,
1070 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1071 { .name
= "munmap", .errmsg
= true,
1072 .arg_scnprintf
= { [0] = SCA_HEX
, /* addr */ }, },
1073 { .name
= "name_to_handle_at", .errmsg
= true,
1074 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1075 { .name
= "newfstatat", .errmsg
= true,
1076 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1077 [1] = SCA_FILENAME
, /* filename */ }, },
1078 { .name
= "open", .errmsg
= true,
1079 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */
1080 [1] = SCA_OPEN_FLAGS
, /* flags */ }, },
1081 { .name
= "open_by_handle_at", .errmsg
= true,
1082 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1083 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1084 { .name
= "openat", .errmsg
= true,
1085 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1086 [1] = SCA_FILENAME
, /* filename */
1087 [2] = SCA_OPEN_FLAGS
, /* flags */ }, },
1088 { .name
= "perf_event_open", .errmsg
= true,
1089 .arg_scnprintf
= { [1] = SCA_INT
, /* pid */
1090 [2] = SCA_INT
, /* cpu */
1091 [3] = SCA_FD
, /* group_fd */
1092 [4] = SCA_PERF_FLAGS
, /* flags */ }, },
1093 { .name
= "pipe2", .errmsg
= true,
1094 .arg_scnprintf
= { [1] = SCA_PIPE_FLAGS
, /* flags */ }, },
1095 { .name
= "poll", .errmsg
= true, .timeout
= true, },
1096 { .name
= "ppoll", .errmsg
= true, .timeout
= true, },
1097 { .name
= "pread", .errmsg
= true, .alias
= "pread64",
1098 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1099 { .name
= "preadv", .errmsg
= true, .alias
= "pread",
1100 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1101 { .name
= "prlimit64", .errmsg
= true, STRARRAY(1, resource
, rlimit_resources
), },
1102 { .name
= "pwrite", .errmsg
= true, .alias
= "pwrite64",
1103 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1104 { .name
= "pwritev", .errmsg
= true,
1105 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1106 { .name
= "read", .errmsg
= true,
1107 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1108 { .name
= "readlink", .errmsg
= true,
1109 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* path */ }, },
1110 { .name
= "readlinkat", .errmsg
= true,
1111 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1112 [1] = SCA_FILENAME
, /* pathname */ }, },
1113 { .name
= "readv", .errmsg
= true,
1114 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1115 { .name
= "recvfrom", .errmsg
= true,
1116 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1117 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1118 { .name
= "recvmmsg", .errmsg
= true,
1119 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1120 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1121 { .name
= "recvmsg", .errmsg
= true,
1122 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1123 [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1124 { .name
= "removexattr", .errmsg
= true,
1125 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1126 { .name
= "renameat", .errmsg
= true,
1127 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1128 { .name
= "rmdir", .errmsg
= true,
1129 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1130 { .name
= "rt_sigaction", .errmsg
= true,
1131 .arg_scnprintf
= { [0] = SCA_SIGNUM
, /* sig */ }, },
1132 { .name
= "rt_sigprocmask", .errmsg
= true, STRARRAY(0, how
, sighow
), },
1133 { .name
= "rt_sigqueueinfo", .errmsg
= true,
1134 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1135 { .name
= "rt_tgsigqueueinfo", .errmsg
= true,
1136 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1137 { .name
= "sched_setscheduler", .errmsg
= true,
1138 .arg_scnprintf
= { [1] = SCA_SCHED_POLICY
, /* policy */ }, },
1139 { .name
= "seccomp", .errmsg
= true,
1140 .arg_scnprintf
= { [0] = SCA_SECCOMP_OP
, /* op */
1141 [1] = SCA_SECCOMP_FLAGS
, /* flags */ }, },
1142 { .name
= "select", .errmsg
= true, .timeout
= true, },
1143 { .name
= "sendmmsg", .errmsg
= true,
1144 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1145 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1146 { .name
= "sendmsg", .errmsg
= true,
1147 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1148 [2] = SCA_MSG_FLAGS
, /* flags */ }, },
1149 { .name
= "sendto", .errmsg
= true,
1150 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */
1151 [3] = SCA_MSG_FLAGS
, /* flags */ }, },
1152 { .name
= "set_tid_address", .errpid
= true, },
1153 { .name
= "setitimer", .errmsg
= true, STRARRAY(0, which
, itimers
), },
1154 { .name
= "setpgid", .errmsg
= true, },
1155 { .name
= "setrlimit", .errmsg
= true, STRARRAY(0, resource
, rlimit_resources
), },
1156 { .name
= "setxattr", .errmsg
= true,
1157 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1158 { .name
= "shutdown", .errmsg
= true,
1159 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1160 { .name
= "socket", .errmsg
= true,
1161 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1162 [1] = SCA_SK_TYPE
, /* type */ },
1163 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1164 { .name
= "socketpair", .errmsg
= true,
1165 .arg_scnprintf
= { [0] = SCA_STRARRAY
, /* family */
1166 [1] = SCA_SK_TYPE
, /* type */ },
1167 .arg_parm
= { [0] = &strarray__socket_families
, /* family */ }, },
1168 { .name
= "stat", .errmsg
= true, .alias
= "newstat",
1169 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1170 { .name
= "statfs", .errmsg
= true,
1171 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* pathname */ }, },
1172 { .name
= "swapoff", .errmsg
= true,
1173 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* specialfile */ }, },
1174 { .name
= "swapon", .errmsg
= true,
1175 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* specialfile */ }, },
1176 { .name
= "symlinkat", .errmsg
= true,
1177 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */ }, },
1178 { .name
= "tgkill", .errmsg
= true,
1179 .arg_scnprintf
= { [2] = SCA_SIGNUM
, /* sig */ }, },
1180 { .name
= "tkill", .errmsg
= true,
1181 .arg_scnprintf
= { [1] = SCA_SIGNUM
, /* sig */ }, },
1182 { .name
= "truncate", .errmsg
= true,
1183 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* path */ }, },
1184 { .name
= "uname", .errmsg
= true, .alias
= "newuname", },
1185 { .name
= "unlinkat", .errmsg
= true,
1186 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dfd */
1187 [1] = SCA_FILENAME
, /* pathname */ }, },
1188 { .name
= "utime", .errmsg
= true,
1189 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1190 { .name
= "utimensat", .errmsg
= true,
1191 .arg_scnprintf
= { [0] = SCA_FDAT
, /* dirfd */
1192 [1] = SCA_FILENAME
, /* filename */ }, },
1193 { .name
= "utimes", .errmsg
= true,
1194 .arg_scnprintf
= { [0] = SCA_FILENAME
, /* filename */ }, },
1195 { .name
= "vmsplice", .errmsg
= true,
1196 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1197 { .name
= "wait4", .errpid
= true,
1198 .arg_scnprintf
= { [2] = SCA_WAITID_OPTIONS
, /* options */ }, },
1199 { .name
= "waitid", .errpid
= true,
1200 .arg_scnprintf
= { [3] = SCA_WAITID_OPTIONS
, /* options */ }, },
1201 { .name
= "write", .errmsg
= true,
1202 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1203 { .name
= "writev", .errmsg
= true,
1204 .arg_scnprintf
= { [0] = SCA_FD
, /* fd */ }, },
1207 static int syscall_fmt__cmp(const void *name
, const void *fmtp
)
1209 const struct syscall_fmt
*fmt
= fmtp
;
1210 return strcmp(name
, fmt
->name
);
1213 static struct syscall_fmt
*syscall_fmt__find(const char *name
)
1215 const int nmemb
= ARRAY_SIZE(syscall_fmts
);
1216 return bsearch(name
, syscall_fmts
, nmemb
, sizeof(struct syscall_fmt
), syscall_fmt__cmp
);
1220 struct event_format
*tp_format
;
1222 struct format_field
*args
;
1225 struct syscall_fmt
*fmt
;
1226 size_t (**arg_scnprintf
)(char *bf
, size_t size
, struct syscall_arg
*arg
);
1230 static size_t fprintf_duration(unsigned long t
, FILE *fp
)
1232 double duration
= (double)t
/ NSEC_PER_MSEC
;
1233 size_t printed
= fprintf(fp
, "(");
1235 if (duration
>= 1.0)
1236 printed
+= color_fprintf(fp
, PERF_COLOR_RED
, "%6.3f ms", duration
);
1237 else if (duration
>= 0.01)
1238 printed
+= color_fprintf(fp
, PERF_COLOR_YELLOW
, "%6.3f ms", duration
);
1240 printed
+= color_fprintf(fp
, PERF_COLOR_NORMAL
, "%6.3f ms", duration
);
1241 return printed
+ fprintf(fp
, "): ");
1245 * filename.ptr: The filename char pointer that will be vfs_getname'd
1246 * filename.entry_str_pos: Where to insert the string translated from
1247 * filename.ptr by the vfs_getname tracepoint/kprobe.
1249 struct thread_trace
{
1253 unsigned long nr_events
;
1254 unsigned long pfmaj
, pfmin
;
1259 short int entry_str_pos
;
1261 unsigned int namelen
;
1269 struct intlist
*syscall_stats
;
1272 static struct thread_trace
*thread_trace__new(void)
1274 struct thread_trace
*ttrace
= zalloc(sizeof(struct thread_trace
));
1277 ttrace
->paths
.max
= -1;
1279 ttrace
->syscall_stats
= intlist__new(NULL
);
1284 static struct thread_trace
*thread__trace(struct thread
*thread
, FILE *fp
)
1286 struct thread_trace
*ttrace
;
1291 if (thread__priv(thread
) == NULL
)
1292 thread__set_priv(thread
, thread_trace__new());
1294 if (thread__priv(thread
) == NULL
)
1297 ttrace
= thread__priv(thread
);
1298 ++ttrace
->nr_events
;
1302 color_fprintf(fp
, PERF_COLOR_RED
,
1303 "WARNING: not enough memory, dropping samples!\n");
1307 #define TRACE_PFMAJ (1 << 0)
1308 #define TRACE_PFMIN (1 << 1)
1310 static const size_t trace__entry_str_size
= 2048;
1312 static int trace__set_fd_pathname(struct thread
*thread
, int fd
, const char *pathname
)
1314 struct thread_trace
*ttrace
= thread__priv(thread
);
1316 if (fd
> ttrace
->paths
.max
) {
1317 char **npath
= realloc(ttrace
->paths
.table
, (fd
+ 1) * sizeof(char *));
1322 if (ttrace
->paths
.max
!= -1) {
1323 memset(npath
+ ttrace
->paths
.max
+ 1, 0,
1324 (fd
- ttrace
->paths
.max
) * sizeof(char *));
1326 memset(npath
, 0, (fd
+ 1) * sizeof(char *));
1329 ttrace
->paths
.table
= npath
;
1330 ttrace
->paths
.max
= fd
;
1333 ttrace
->paths
.table
[fd
] = strdup(pathname
);
1335 return ttrace
->paths
.table
[fd
] != NULL
? 0 : -1;
1338 static int thread__read_fd_path(struct thread
*thread
, int fd
)
1340 char linkname
[PATH_MAX
], pathname
[PATH_MAX
];
1344 if (thread
->pid_
== thread
->tid
) {
1345 scnprintf(linkname
, sizeof(linkname
),
1346 "/proc/%d/fd/%d", thread
->pid_
, fd
);
1348 scnprintf(linkname
, sizeof(linkname
),
1349 "/proc/%d/task/%d/fd/%d", thread
->pid_
, thread
->tid
, fd
);
1352 if (lstat(linkname
, &st
) < 0 || st
.st_size
+ 1 > (off_t
)sizeof(pathname
))
1355 ret
= readlink(linkname
, pathname
, sizeof(pathname
));
1357 if (ret
< 0 || ret
> st
.st_size
)
1360 pathname
[ret
] = '\0';
1361 return trace__set_fd_pathname(thread
, fd
, pathname
);
1364 static const char *thread__fd_path(struct thread
*thread
, int fd
,
1365 struct trace
*trace
)
1367 struct thread_trace
*ttrace
= thread__priv(thread
);
1375 if ((fd
> ttrace
->paths
.max
|| ttrace
->paths
.table
[fd
] == NULL
)) {
1378 ++trace
->stats
.proc_getname
;
1379 if (thread__read_fd_path(thread
, fd
))
1383 return ttrace
->paths
.table
[fd
];
1386 static size_t syscall_arg__scnprintf_fd(char *bf
, size_t size
,
1387 struct syscall_arg
*arg
)
1390 size_t printed
= scnprintf(bf
, size
, "%d", fd
);
1391 const char *path
= thread__fd_path(arg
->thread
, fd
, arg
->trace
);
1394 printed
+= scnprintf(bf
+ printed
, size
- printed
, "<%s>", path
);
1399 static size_t syscall_arg__scnprintf_close_fd(char *bf
, size_t size
,
1400 struct syscall_arg
*arg
)
1403 size_t printed
= syscall_arg__scnprintf_fd(bf
, size
, arg
);
1404 struct thread_trace
*ttrace
= thread__priv(arg
->thread
);
1406 if (ttrace
&& fd
>= 0 && fd
<= ttrace
->paths
.max
)
1407 zfree(&ttrace
->paths
.table
[fd
]);
1412 static void thread__set_filename_pos(struct thread
*thread
, const char *bf
,
1415 struct thread_trace
*ttrace
= thread__priv(thread
);
1417 ttrace
->filename
.ptr
= ptr
;
1418 ttrace
->filename
.entry_str_pos
= bf
- ttrace
->entry_str
;
1421 static size_t syscall_arg__scnprintf_filename(char *bf
, size_t size
,
1422 struct syscall_arg
*arg
)
1424 unsigned long ptr
= arg
->val
;
1426 if (!arg
->trace
->vfs_getname
)
1427 return scnprintf(bf
, size
, "%#x", ptr
);
1429 thread__set_filename_pos(arg
->thread
, bf
, ptr
);
1433 static bool trace__filter_duration(struct trace
*trace
, double t
)
1435 return t
< (trace
->duration_filter
* NSEC_PER_MSEC
);
1438 static size_t trace__fprintf_tstamp(struct trace
*trace
, u64 tstamp
, FILE *fp
)
1440 double ts
= (double)(tstamp
- trace
->base_time
) / NSEC_PER_MSEC
;
1442 return fprintf(fp
, "%10.3f ", ts
);
1445 static bool done
= false;
1446 static bool interrupted
= false;
1448 static void sig_handler(int sig
)
1451 interrupted
= sig
== SIGINT
;
1454 static size_t trace__fprintf_entry_head(struct trace
*trace
, struct thread
*thread
,
1455 u64 duration
, u64 tstamp
, FILE *fp
)
1457 size_t printed
= trace__fprintf_tstamp(trace
, tstamp
, fp
);
1458 printed
+= fprintf_duration(duration
, fp
);
1460 if (trace
->multiple_threads
) {
1461 if (trace
->show_comm
)
1462 printed
+= fprintf(fp
, "%.14s/", thread__comm_str(thread
));
1463 printed
+= fprintf(fp
, "%d ", thread
->tid
);
1469 static int trace__process_event(struct trace
*trace
, struct machine
*machine
,
1470 union perf_event
*event
, struct perf_sample
*sample
)
1474 switch (event
->header
.type
) {
1475 case PERF_RECORD_LOST
:
1476 color_fprintf(trace
->output
, PERF_COLOR_RED
,
1477 "LOST %" PRIu64
" events!\n", event
->lost
.lost
);
1478 ret
= machine__process_lost_event(machine
, event
, sample
);
1481 ret
= machine__process_event(machine
, event
, sample
);
1488 static int trace__tool_process(struct perf_tool
*tool
,
1489 union perf_event
*event
,
1490 struct perf_sample
*sample
,
1491 struct machine
*machine
)
1493 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
1494 return trace__process_event(trace
, machine
, event
, sample
);
1497 static int trace__symbols_init(struct trace
*trace
, struct perf_evlist
*evlist
)
1499 int err
= symbol__init(NULL
);
1504 trace
->host
= machine__new_host();
1505 if (trace
->host
== NULL
)
1508 if (trace_event__register_resolver(trace
->host
, machine__resolve_kernel_addr
) < 0)
1511 err
= __machine__synthesize_threads(trace
->host
, &trace
->tool
, &trace
->opts
.target
,
1512 evlist
->threads
, trace__tool_process
, false,
1513 trace
->opts
.proc_map_timeout
);
1520 static int syscall__set_arg_fmts(struct syscall
*sc
)
1522 struct format_field
*field
;
1525 sc
->arg_scnprintf
= calloc(sc
->nr_args
, sizeof(void *));
1526 if (sc
->arg_scnprintf
== NULL
)
1530 sc
->arg_parm
= sc
->fmt
->arg_parm
;
1532 for (field
= sc
->args
; field
; field
= field
->next
) {
1533 if (sc
->fmt
&& sc
->fmt
->arg_scnprintf
[idx
])
1534 sc
->arg_scnprintf
[idx
] = sc
->fmt
->arg_scnprintf
[idx
];
1535 else if (field
->flags
& FIELD_IS_POINTER
)
1536 sc
->arg_scnprintf
[idx
] = syscall_arg__scnprintf_hex
;
1537 else if (strcmp(field
->type
, "pid_t") == 0)
1538 sc
->arg_scnprintf
[idx
] = SCA_PID
;
1539 else if (strcmp(field
->type
, "umode_t") == 0)
1540 sc
->arg_scnprintf
[idx
] = SCA_MODE_T
;
1547 static int trace__read_syscall_info(struct trace
*trace
, int id
)
1551 const char *name
= syscalltbl__name(trace
->sctbl
, id
);
1556 if (id
> trace
->syscalls
.max
) {
1557 struct syscall
*nsyscalls
= realloc(trace
->syscalls
.table
, (id
+ 1) * sizeof(*sc
));
1559 if (nsyscalls
== NULL
)
1562 if (trace
->syscalls
.max
!= -1) {
1563 memset(nsyscalls
+ trace
->syscalls
.max
+ 1, 0,
1564 (id
- trace
->syscalls
.max
) * sizeof(*sc
));
1566 memset(nsyscalls
, 0, (id
+ 1) * sizeof(*sc
));
1569 trace
->syscalls
.table
= nsyscalls
;
1570 trace
->syscalls
.max
= id
;
1573 sc
= trace
->syscalls
.table
+ id
;
1576 sc
->fmt
= syscall_fmt__find(sc
->name
);
1578 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->name
);
1579 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1581 if (IS_ERR(sc
->tp_format
) && sc
->fmt
&& sc
->fmt
->alias
) {
1582 snprintf(tp_name
, sizeof(tp_name
), "sys_enter_%s", sc
->fmt
->alias
);
1583 sc
->tp_format
= trace_event__tp_format("syscalls", tp_name
);
1586 if (IS_ERR(sc
->tp_format
))
1589 sc
->args
= sc
->tp_format
->format
.fields
;
1590 sc
->nr_args
= sc
->tp_format
->format
.nr_fields
;
1592 * We need to check and discard the first variable '__syscall_nr'
1593 * or 'nr' that mean the syscall number. It is needless here.
1594 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1596 if (sc
->args
&& (!strcmp(sc
->args
->name
, "__syscall_nr") || !strcmp(sc
->args
->name
, "nr"))) {
1597 sc
->args
= sc
->args
->next
;
1601 sc
->is_exit
= !strcmp(name
, "exit_group") || !strcmp(name
, "exit");
1603 return syscall__set_arg_fmts(sc
);
1606 static int trace__validate_ev_qualifier(struct trace
*trace
)
1609 struct str_node
*pos
;
1611 trace
->ev_qualifier_ids
.nr
= strlist__nr_entries(trace
->ev_qualifier
);
1612 trace
->ev_qualifier_ids
.entries
= malloc(trace
->ev_qualifier_ids
.nr
*
1613 sizeof(trace
->ev_qualifier_ids
.entries
[0]));
1615 if (trace
->ev_qualifier_ids
.entries
== NULL
) {
1616 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1624 strlist__for_each(pos
, trace
->ev_qualifier
) {
1625 const char *sc
= pos
->s
;
1626 int id
= syscalltbl__id(trace
->sctbl
, sc
);
1630 fputs("Error:\tInvalid syscall ", trace
->output
);
1633 fputs(", ", trace
->output
);
1636 fputs(sc
, trace
->output
);
1639 trace
->ev_qualifier_ids
.entries
[i
++] = id
;
1643 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1644 "\nHint:\tand: 'man syscalls'\n", trace
->output
);
1645 zfree(&trace
->ev_qualifier_ids
.entries
);
1646 trace
->ev_qualifier_ids
.nr
= 0;
1653 * args is to be interpreted as a series of longs but we need to handle
1654 * 8-byte unaligned accesses. args points to raw_data within the event
1655 * and raw_data is guaranteed to be 8-byte unaligned because it is
1656 * preceded by raw_size which is a u32. So we need to copy args to a temp
1657 * variable to read it. Most notably this avoids extended load instructions
1658 * on unaligned addresses
1661 static size_t syscall__scnprintf_args(struct syscall
*sc
, char *bf
, size_t size
,
1662 unsigned char *args
, struct trace
*trace
,
1663 struct thread
*thread
)
1669 if (sc
->args
!= NULL
) {
1670 struct format_field
*field
;
1672 struct syscall_arg arg
= {
1679 for (field
= sc
->args
; field
;
1680 field
= field
->next
, ++arg
.idx
, bit
<<= 1) {
1684 /* special care for unaligned accesses */
1685 p
= args
+ sizeof(unsigned long) * arg
.idx
;
1686 memcpy(&val
, p
, sizeof(val
));
1689 * Suppress this argument if its value is zero and
1690 * and we don't have a string associated in an
1694 !(sc
->arg_scnprintf
&&
1695 sc
->arg_scnprintf
[arg
.idx
] == SCA_STRARRAY
&&
1696 sc
->arg_parm
[arg
.idx
]))
1699 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1700 "%s%s: ", printed
? ", " : "", field
->name
);
1701 if (sc
->arg_scnprintf
&& sc
->arg_scnprintf
[arg
.idx
]) {
1704 arg
.parm
= sc
->arg_parm
[arg
.idx
];
1705 printed
+= sc
->arg_scnprintf
[arg
.idx
](bf
+ printed
,
1706 size
- printed
, &arg
);
1708 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1716 /* special care for unaligned accesses */
1717 p
= args
+ sizeof(unsigned long) * i
;
1718 memcpy(&val
, p
, sizeof(val
));
1719 printed
+= scnprintf(bf
+ printed
, size
- printed
,
1721 printed
? ", " : "", i
, val
);
1729 typedef int (*tracepoint_handler
)(struct trace
*trace
, struct perf_evsel
*evsel
,
1730 union perf_event
*event
,
1731 struct perf_sample
*sample
);
1733 static struct syscall
*trace__syscall_info(struct trace
*trace
,
1734 struct perf_evsel
*evsel
, int id
)
1740 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1741 * before that, leaving at a higher verbosity level till that is
1742 * explained. Reproduced with plain ftrace with:
1744 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1745 * grep "NR -1 " /t/trace_pipe
1747 * After generating some load on the machine.
1751 fprintf(trace
->output
, "Invalid syscall %d id, skipping (%s, %" PRIu64
") ...\n",
1752 id
, perf_evsel__name(evsel
), ++n
);
1757 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
) &&
1758 trace__read_syscall_info(trace
, id
))
1761 if ((id
> trace
->syscalls
.max
|| trace
->syscalls
.table
[id
].name
== NULL
))
1764 return &trace
->syscalls
.table
[id
];
1768 fprintf(trace
->output
, "Problems reading syscall %d", id
);
1769 if (id
<= trace
->syscalls
.max
&& trace
->syscalls
.table
[id
].name
!= NULL
)
1770 fprintf(trace
->output
, "(%s)", trace
->syscalls
.table
[id
].name
);
1771 fputs(" information\n", trace
->output
);
1776 static void thread__update_stats(struct thread_trace
*ttrace
,
1777 int id
, struct perf_sample
*sample
)
1779 struct int_node
*inode
;
1780 struct stats
*stats
;
1783 inode
= intlist__findnew(ttrace
->syscall_stats
, id
);
1787 stats
= inode
->priv
;
1788 if (stats
== NULL
) {
1789 stats
= malloc(sizeof(struct stats
));
1793 inode
->priv
= stats
;
1796 if (ttrace
->entry_time
&& sample
->time
> ttrace
->entry_time
)
1797 duration
= sample
->time
- ttrace
->entry_time
;
1799 update_stats(stats
, duration
);
1802 static int trace__printf_interrupted_entry(struct trace
*trace
, struct perf_sample
*sample
)
1804 struct thread_trace
*ttrace
;
1808 if (trace
->current
== NULL
)
1811 ttrace
= thread__priv(trace
->current
);
1813 if (!ttrace
->entry_pending
)
1816 duration
= sample
->time
- ttrace
->entry_time
;
1818 printed
= trace__fprintf_entry_head(trace
, trace
->current
, duration
, sample
->time
, trace
->output
);
1819 printed
+= fprintf(trace
->output
, "%-70s) ...\n", ttrace
->entry_str
);
1820 ttrace
->entry_pending
= false;
1825 static int trace__sys_enter(struct trace
*trace
, struct perf_evsel
*evsel
,
1826 union perf_event
*event __maybe_unused
,
1827 struct perf_sample
*sample
)
1832 struct thread
*thread
;
1833 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
), err
= -1;
1834 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
1835 struct thread_trace
*ttrace
;
1840 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
1841 ttrace
= thread__trace(thread
, trace
->output
);
1845 args
= perf_evsel__sc_tp_ptr(evsel
, args
, sample
);
1847 if (ttrace
->entry_str
== NULL
) {
1848 ttrace
->entry_str
= malloc(trace__entry_str_size
);
1849 if (!ttrace
->entry_str
)
1853 if (!(trace
->duration_filter
|| trace
->summary_only
|| trace
->min_stack
))
1854 trace__printf_interrupted_entry(trace
, sample
);
1856 ttrace
->entry_time
= sample
->time
;
1857 msg
= ttrace
->entry_str
;
1858 printed
+= scnprintf(msg
+ printed
, trace__entry_str_size
- printed
, "%s(", sc
->name
);
1860 printed
+= syscall__scnprintf_args(sc
, msg
+ printed
, trace__entry_str_size
- printed
,
1861 args
, trace
, thread
);
1864 if (!(trace
->duration_filter
|| trace
->summary_only
|| trace
->min_stack
)) {
1865 trace__fprintf_entry_head(trace
, thread
, 1, sample
->time
, trace
->output
);
1866 fprintf(trace
->output
, "%-70s\n", ttrace
->entry_str
);
1869 ttrace
->entry_pending
= true;
1870 /* See trace__vfs_getname & trace__sys_exit */
1871 ttrace
->filename
.pending_open
= false;
1874 if (trace
->current
!= thread
) {
1875 thread__put(trace
->current
);
1876 trace
->current
= thread__get(thread
);
1880 thread__put(thread
);
1884 static int trace__resolve_callchain(struct trace
*trace
, struct perf_evsel
*evsel
,
1885 struct perf_sample
*sample
,
1886 struct callchain_cursor
*cursor
)
1888 struct addr_location al
;
1890 if (machine__resolve(trace
->host
, &al
, sample
) < 0 ||
1891 thread__resolve_callchain(al
.thread
, cursor
, evsel
, sample
, NULL
, NULL
, trace
->max_stack
))
1897 static int trace__fprintf_callchain(struct trace
*trace
, struct perf_sample
*sample
)
1899 /* TODO: user-configurable print_opts */
1900 const unsigned int print_opts
= EVSEL__PRINT_SYM
|
1902 EVSEL__PRINT_UNKNOWN_AS_ADDR
;
1904 return sample__fprintf_callchain(sample
, 38, print_opts
, &callchain_cursor
, trace
->output
);
1907 static int trace__sys_exit(struct trace
*trace
, struct perf_evsel
*evsel
,
1908 union perf_event
*event __maybe_unused
,
1909 struct perf_sample
*sample
)
1913 struct thread
*thread
;
1914 int id
= perf_evsel__sc_tp_uint(evsel
, id
, sample
), err
= -1, callchain_ret
= 0;
1915 struct syscall
*sc
= trace__syscall_info(trace
, evsel
, id
);
1916 struct thread_trace
*ttrace
;
1921 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
1922 ttrace
= thread__trace(thread
, trace
->output
);
1927 thread__update_stats(ttrace
, id
, sample
);
1929 ret
= perf_evsel__sc_tp_uint(evsel
, ret
, sample
);
1931 if (id
== trace
->open_id
&& ret
>= 0 && ttrace
->filename
.pending_open
) {
1932 trace__set_fd_pathname(thread
, ret
, ttrace
->filename
.name
);
1933 ttrace
->filename
.pending_open
= false;
1934 ++trace
->stats
.vfs_getname
;
1937 ttrace
->exit_time
= sample
->time
;
1939 if (ttrace
->entry_time
) {
1940 duration
= sample
->time
- ttrace
->entry_time
;
1941 if (trace__filter_duration(trace
, duration
))
1943 } else if (trace
->duration_filter
)
1946 if (sample
->callchain
) {
1947 callchain_ret
= trace__resolve_callchain(trace
, evsel
, sample
, &callchain_cursor
);
1948 if (callchain_ret
== 0) {
1949 if (callchain_cursor
.nr
< trace
->min_stack
)
1955 if (trace
->summary_only
)
1958 trace__fprintf_entry_head(trace
, thread
, duration
, sample
->time
, trace
->output
);
1960 if (ttrace
->entry_pending
) {
1961 fprintf(trace
->output
, "%-70s", ttrace
->entry_str
);
1963 fprintf(trace
->output
, " ... [");
1964 color_fprintf(trace
->output
, PERF_COLOR_YELLOW
, "continued");
1965 fprintf(trace
->output
, "]: %s()", sc
->name
);
1968 if (sc
->fmt
== NULL
) {
1970 fprintf(trace
->output
, ") = %ld", ret
);
1971 } else if (ret
< 0 && (sc
->fmt
->errmsg
|| sc
->fmt
->errpid
)) {
1972 char bf
[STRERR_BUFSIZE
];
1973 const char *emsg
= strerror_r(-ret
, bf
, sizeof(bf
)),
1974 *e
= audit_errno_to_name(-ret
);
1976 fprintf(trace
->output
, ") = -1 %s %s", e
, emsg
);
1977 } else if (ret
== 0 && sc
->fmt
->timeout
)
1978 fprintf(trace
->output
, ") = 0 Timeout");
1979 else if (sc
->fmt
->hexret
)
1980 fprintf(trace
->output
, ") = %#lx", ret
);
1981 else if (sc
->fmt
->errpid
) {
1982 struct thread
*child
= machine__find_thread(trace
->host
, ret
, ret
);
1984 if (child
!= NULL
) {
1985 fprintf(trace
->output
, ") = %ld", ret
);
1986 if (child
->comm_set
)
1987 fprintf(trace
->output
, " (%s)", thread__comm_str(child
));
1993 fputc('\n', trace
->output
);
1995 if (callchain_ret
> 0)
1996 trace__fprintf_callchain(trace
, sample
);
1997 else if (callchain_ret
< 0)
1998 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel
));
2000 ttrace
->entry_pending
= false;
2003 thread__put(thread
);
2007 static int trace__vfs_getname(struct trace
*trace
, struct perf_evsel
*evsel
,
2008 union perf_event
*event __maybe_unused
,
2009 struct perf_sample
*sample
)
2011 struct thread
*thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2012 struct thread_trace
*ttrace
;
2013 size_t filename_len
, entry_str_len
, to_move
;
2014 ssize_t remaining_space
;
2016 const char *filename
= perf_evsel__rawptr(evsel
, sample
, "pathname");
2021 ttrace
= thread__priv(thread
);
2025 filename_len
= strlen(filename
);
2027 if (ttrace
->filename
.namelen
< filename_len
) {
2028 char *f
= realloc(ttrace
->filename
.name
, filename_len
+ 1);
2033 ttrace
->filename
.namelen
= filename_len
;
2034 ttrace
->filename
.name
= f
;
2037 strcpy(ttrace
->filename
.name
, filename
);
2038 ttrace
->filename
.pending_open
= true;
2040 if (!ttrace
->filename
.ptr
)
2043 entry_str_len
= strlen(ttrace
->entry_str
);
2044 remaining_space
= trace__entry_str_size
- entry_str_len
- 1; /* \0 */
2045 if (remaining_space
<= 0)
2048 if (filename_len
> (size_t)remaining_space
) {
2049 filename
+= filename_len
- remaining_space
;
2050 filename_len
= remaining_space
;
2053 to_move
= entry_str_len
- ttrace
->filename
.entry_str_pos
+ 1; /* \0 */
2054 pos
= ttrace
->entry_str
+ ttrace
->filename
.entry_str_pos
;
2055 memmove(pos
+ filename_len
, pos
, to_move
);
2056 memcpy(pos
, filename
, filename_len
);
2058 ttrace
->filename
.ptr
= 0;
2059 ttrace
->filename
.entry_str_pos
= 0;
2064 static int trace__sched_stat_runtime(struct trace
*trace
, struct perf_evsel
*evsel
,
2065 union perf_event
*event __maybe_unused
,
2066 struct perf_sample
*sample
)
2068 u64 runtime
= perf_evsel__intval(evsel
, sample
, "runtime");
2069 double runtime_ms
= (double)runtime
/ NSEC_PER_MSEC
;
2070 struct thread
*thread
= machine__findnew_thread(trace
->host
,
2073 struct thread_trace
*ttrace
= thread__trace(thread
, trace
->output
);
2078 ttrace
->runtime_ms
+= runtime_ms
;
2079 trace
->runtime_ms
+= runtime_ms
;
2080 thread__put(thread
);
2084 fprintf(trace
->output
, "%s: comm=%s,pid=%u,runtime=%" PRIu64
",vruntime=%" PRIu64
")\n",
2086 perf_evsel__strval(evsel
, sample
, "comm"),
2087 (pid_t
)perf_evsel__intval(evsel
, sample
, "pid"),
2089 perf_evsel__intval(evsel
, sample
, "vruntime"));
2090 thread__put(thread
);
2094 static void bpf_output__printer(enum binary_printer_ops op
,
2095 unsigned int val
, void *extra
)
2097 FILE *output
= extra
;
2098 unsigned char ch
= (unsigned char)val
;
2101 case BINARY_PRINT_CHAR_DATA
:
2102 fprintf(output
, "%c", isprint(ch
) ? ch
: '.');
2104 case BINARY_PRINT_DATA_BEGIN
:
2105 case BINARY_PRINT_LINE_BEGIN
:
2106 case BINARY_PRINT_ADDR
:
2107 case BINARY_PRINT_NUM_DATA
:
2108 case BINARY_PRINT_NUM_PAD
:
2109 case BINARY_PRINT_SEP
:
2110 case BINARY_PRINT_CHAR_PAD
:
2111 case BINARY_PRINT_LINE_END
:
2112 case BINARY_PRINT_DATA_END
:
2118 static void bpf_output__fprintf(struct trace
*trace
,
2119 struct perf_sample
*sample
)
2121 print_binary(sample
->raw_data
, sample
->raw_size
, 8,
2122 bpf_output__printer
, trace
->output
);
2125 static int trace__event_handler(struct trace
*trace
, struct perf_evsel
*evsel
,
2126 union perf_event
*event __maybe_unused
,
2127 struct perf_sample
*sample
)
2129 trace__printf_interrupted_entry(trace
, sample
);
2130 trace__fprintf_tstamp(trace
, sample
->time
, trace
->output
);
2132 if (trace
->trace_syscalls
)
2133 fprintf(trace
->output
, "( ): ");
2135 fprintf(trace
->output
, "%s:", evsel
->name
);
2137 if (perf_evsel__is_bpf_output(evsel
)) {
2138 bpf_output__fprintf(trace
, sample
);
2139 } else if (evsel
->tp_format
) {
2140 event_format__fprintf(evsel
->tp_format
, sample
->cpu
,
2141 sample
->raw_data
, sample
->raw_size
,
2145 fprintf(trace
->output
, ")\n");
2147 if (sample
->callchain
) {
2148 if (trace__resolve_callchain(trace
, evsel
, sample
, &callchain_cursor
) == 0)
2149 trace__fprintf_callchain(trace
, sample
);
2155 static void print_location(FILE *f
, struct perf_sample
*sample
,
2156 struct addr_location
*al
,
2157 bool print_dso
, bool print_sym
)
2160 if ((verbose
|| print_dso
) && al
->map
)
2161 fprintf(f
, "%s@", al
->map
->dso
->long_name
);
2163 if ((verbose
|| print_sym
) && al
->sym
)
2164 fprintf(f
, "%s+0x%" PRIx64
, al
->sym
->name
,
2165 al
->addr
- al
->sym
->start
);
2167 fprintf(f
, "0x%" PRIx64
, al
->addr
);
2169 fprintf(f
, "0x%" PRIx64
, sample
->addr
);
2172 static int trace__pgfault(struct trace
*trace
,
2173 struct perf_evsel
*evsel
,
2174 union perf_event
*event __maybe_unused
,
2175 struct perf_sample
*sample
)
2177 struct thread
*thread
;
2178 struct addr_location al
;
2179 char map_type
= 'd';
2180 struct thread_trace
*ttrace
;
2183 thread
= machine__findnew_thread(trace
->host
, sample
->pid
, sample
->tid
);
2184 ttrace
= thread__trace(thread
, trace
->output
);
2188 if (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
)
2193 if (trace
->summary_only
)
2196 thread__find_addr_location(thread
, sample
->cpumode
, MAP__FUNCTION
,
2199 trace__fprintf_entry_head(trace
, thread
, 0, sample
->time
, trace
->output
);
2201 fprintf(trace
->output
, "%sfault [",
2202 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
?
2205 print_location(trace
->output
, sample
, &al
, false, true);
2207 fprintf(trace
->output
, "] => ");
2209 thread__find_addr_location(thread
, sample
->cpumode
, MAP__VARIABLE
,
2213 thread__find_addr_location(thread
, sample
->cpumode
,
2214 MAP__FUNCTION
, sample
->addr
, &al
);
2222 print_location(trace
->output
, sample
, &al
, true, false);
2224 fprintf(trace
->output
, " (%c%c)\n", map_type
, al
.level
);
2228 thread__put(thread
);
2232 static bool skip_sample(struct trace
*trace
, struct perf_sample
*sample
)
2234 if ((trace
->pid_list
&& intlist__find(trace
->pid_list
, sample
->pid
)) ||
2235 (trace
->tid_list
&& intlist__find(trace
->tid_list
, sample
->tid
)))
2238 if (trace
->pid_list
|| trace
->tid_list
)
2244 static void trace__set_base_time(struct trace
*trace
,
2245 struct perf_evsel
*evsel
,
2246 struct perf_sample
*sample
)
2249 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2250 * and don't use sample->time unconditionally, we may end up having
2251 * some other event in the future without PERF_SAMPLE_TIME for good
2252 * reason, i.e. we may not be interested in its timestamps, just in
2253 * it taking place, picking some piece of information when it
2254 * appears in our event stream (vfs_getname comes to mind).
2256 if (trace
->base_time
== 0 && !trace
->full_time
&&
2257 (evsel
->attr
.sample_type
& PERF_SAMPLE_TIME
))
2258 trace
->base_time
= sample
->time
;
2261 static int trace__process_sample(struct perf_tool
*tool
,
2262 union perf_event
*event
,
2263 struct perf_sample
*sample
,
2264 struct perf_evsel
*evsel
,
2265 struct machine
*machine __maybe_unused
)
2267 struct trace
*trace
= container_of(tool
, struct trace
, tool
);
2270 tracepoint_handler handler
= evsel
->handler
;
2272 if (skip_sample(trace
, sample
))
2275 trace__set_base_time(trace
, evsel
, sample
);
2279 handler(trace
, evsel
, event
, sample
);
2285 static int parse_target_str(struct trace
*trace
)
2287 if (trace
->opts
.target
.pid
) {
2288 trace
->pid_list
= intlist__new(trace
->opts
.target
.pid
);
2289 if (trace
->pid_list
== NULL
) {
2290 pr_err("Error parsing process id string\n");
2295 if (trace
->opts
.target
.tid
) {
2296 trace
->tid_list
= intlist__new(trace
->opts
.target
.tid
);
2297 if (trace
->tid_list
== NULL
) {
2298 pr_err("Error parsing thread id string\n");
2306 static int trace__record(struct trace
*trace
, int argc
, const char **argv
)
2308 unsigned int rec_argc
, i
, j
;
2309 const char **rec_argv
;
2310 const char * const record_args
[] = {
2317 const char * const sc_args
[] = { "-e", };
2318 unsigned int sc_args_nr
= ARRAY_SIZE(sc_args
);
2319 const char * const majpf_args
[] = { "-e", "major-faults" };
2320 unsigned int majpf_args_nr
= ARRAY_SIZE(majpf_args
);
2321 const char * const minpf_args
[] = { "-e", "minor-faults" };
2322 unsigned int minpf_args_nr
= ARRAY_SIZE(minpf_args
);
2324 /* +1 is for the event string below */
2325 rec_argc
= ARRAY_SIZE(record_args
) + sc_args_nr
+ 1 +
2326 majpf_args_nr
+ minpf_args_nr
+ argc
;
2327 rec_argv
= calloc(rec_argc
+ 1, sizeof(char *));
2329 if (rec_argv
== NULL
)
2333 for (i
= 0; i
< ARRAY_SIZE(record_args
); i
++)
2334 rec_argv
[j
++] = record_args
[i
];
2336 if (trace
->trace_syscalls
) {
2337 for (i
= 0; i
< sc_args_nr
; i
++)
2338 rec_argv
[j
++] = sc_args
[i
];
2340 /* event string may be different for older kernels - e.g., RHEL6 */
2341 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2342 rec_argv
[j
++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2343 else if (is_valid_tracepoint("syscalls:sys_enter"))
2344 rec_argv
[j
++] = "syscalls:sys_enter,syscalls:sys_exit";
2346 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2351 if (trace
->trace_pgfaults
& TRACE_PFMAJ
)
2352 for (i
= 0; i
< majpf_args_nr
; i
++)
2353 rec_argv
[j
++] = majpf_args
[i
];
2355 if (trace
->trace_pgfaults
& TRACE_PFMIN
)
2356 for (i
= 0; i
< minpf_args_nr
; i
++)
2357 rec_argv
[j
++] = minpf_args
[i
];
2359 for (i
= 0; i
< (unsigned int)argc
; i
++)
2360 rec_argv
[j
++] = argv
[i
];
2362 return cmd_record(j
, rec_argv
, NULL
);
2365 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
);
2367 static bool perf_evlist__add_vfs_getname(struct perf_evlist
*evlist
)
2369 struct perf_evsel
*evsel
= perf_evsel__newtp("probe", "vfs_getname");
2374 if (perf_evsel__field(evsel
, "pathname") == NULL
) {
2375 perf_evsel__delete(evsel
);
2379 evsel
->handler
= trace__vfs_getname
;
2380 perf_evlist__add(evlist
, evsel
);
2384 static int perf_evlist__add_pgfault(struct perf_evlist
*evlist
,
2387 struct perf_evsel
*evsel
;
2388 struct perf_event_attr attr
= {
2389 .type
= PERF_TYPE_SOFTWARE
,
2393 attr
.config
= config
;
2394 attr
.sample_period
= 1;
2396 event_attr_init(&attr
);
2398 evsel
= perf_evsel__new(&attr
);
2402 evsel
->handler
= trace__pgfault
;
2403 perf_evlist__add(evlist
, evsel
);
2408 static void trace__handle_event(struct trace
*trace
, union perf_event
*event
, struct perf_sample
*sample
)
2410 const u32 type
= event
->header
.type
;
2411 struct perf_evsel
*evsel
;
2413 if (type
!= PERF_RECORD_SAMPLE
) {
2414 trace__process_event(trace
, trace
->host
, event
, sample
);
2418 evsel
= perf_evlist__id2evsel(trace
->evlist
, sample
->id
);
2419 if (evsel
== NULL
) {
2420 fprintf(trace
->output
, "Unknown tp ID %" PRIu64
", skipping...\n", sample
->id
);
2424 trace__set_base_time(trace
, evsel
, sample
);
2426 if (evsel
->attr
.type
== PERF_TYPE_TRACEPOINT
&&
2427 sample
->raw_data
== NULL
) {
2428 fprintf(trace
->output
, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2429 perf_evsel__name(evsel
), sample
->tid
,
2430 sample
->cpu
, sample
->raw_size
);
2432 tracepoint_handler handler
= evsel
->handler
;
2433 handler(trace
, evsel
, event
, sample
);
2437 static int trace__add_syscall_newtp(struct trace
*trace
)
2440 struct perf_evlist
*evlist
= trace
->evlist
;
2441 struct perf_evsel
*sys_enter
, *sys_exit
;
2443 sys_enter
= perf_evsel__syscall_newtp("sys_enter", trace__sys_enter
);
2444 if (sys_enter
== NULL
)
2447 if (perf_evsel__init_sc_tp_ptr_field(sys_enter
, args
))
2448 goto out_delete_sys_enter
;
2450 sys_exit
= perf_evsel__syscall_newtp("sys_exit", trace__sys_exit
);
2451 if (sys_exit
== NULL
)
2452 goto out_delete_sys_enter
;
2454 if (perf_evsel__init_sc_tp_uint_field(sys_exit
, ret
))
2455 goto out_delete_sys_exit
;
2457 perf_evlist__add(evlist
, sys_enter
);
2458 perf_evlist__add(evlist
, sys_exit
);
2460 if (trace
->opts
.callgraph_set
&& !trace
->kernel_syscallchains
) {
2462 * We're interested only in the user space callchain
2463 * leading to the syscall, allow overriding that for
2464 * debugging reasons using --kernel_syscall_callchains
2466 sys_exit
->attr
.exclude_callchain_kernel
= 1;
2469 trace
->syscalls
.events
.sys_enter
= sys_enter
;
2470 trace
->syscalls
.events
.sys_exit
= sys_exit
;
2476 out_delete_sys_exit
:
2477 perf_evsel__delete_priv(sys_exit
);
2478 out_delete_sys_enter
:
2479 perf_evsel__delete_priv(sys_enter
);
2483 static int trace__set_ev_qualifier_filter(struct trace
*trace
)
2486 char *filter
= asprintf_expr_inout_ints("id", !trace
->not_ev_qualifier
,
2487 trace
->ev_qualifier_ids
.nr
,
2488 trace
->ev_qualifier_ids
.entries
);
2493 if (!perf_evsel__append_filter(trace
->syscalls
.events
.sys_enter
, "&&", filter
))
2494 err
= perf_evsel__append_filter(trace
->syscalls
.events
.sys_exit
, "&&", filter
);
2504 static int trace__run(struct trace
*trace
, int argc
, const char **argv
)
2506 struct perf_evlist
*evlist
= trace
->evlist
;
2507 struct perf_evsel
*evsel
;
2509 unsigned long before
;
2510 const bool forks
= argc
> 0;
2511 bool draining
= false;
2515 if (trace
->trace_syscalls
&& trace__add_syscall_newtp(trace
))
2516 goto out_error_raw_syscalls
;
2518 if (trace
->trace_syscalls
)
2519 trace
->vfs_getname
= perf_evlist__add_vfs_getname(evlist
);
2521 if ((trace
->trace_pgfaults
& TRACE_PFMAJ
) &&
2522 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MAJ
)) {
2526 if ((trace
->trace_pgfaults
& TRACE_PFMIN
) &&
2527 perf_evlist__add_pgfault(evlist
, PERF_COUNT_SW_PAGE_FAULTS_MIN
))
2531 perf_evlist__add_newtp(evlist
, "sched", "sched_stat_runtime",
2532 trace__sched_stat_runtime
))
2533 goto out_error_sched_stat_runtime
;
2535 err
= perf_evlist__create_maps(evlist
, &trace
->opts
.target
);
2537 fprintf(trace
->output
, "Problems parsing the target to trace, check your options!\n");
2538 goto out_delete_evlist
;
2541 err
= trace__symbols_init(trace
, evlist
);
2543 fprintf(trace
->output
, "Problems initializing symbol libraries!\n");
2544 goto out_delete_evlist
;
2547 perf_evlist__config(evlist
, &trace
->opts
, NULL
);
2549 if (trace
->opts
.callgraph_set
&& trace
->syscalls
.events
.sys_exit
) {
2550 perf_evsel__config_callchain(trace
->syscalls
.events
.sys_exit
,
2551 &trace
->opts
, &callchain_param
);
2553 * Now we have evsels with different sample_ids, use
2554 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2555 * from a fixed position in each ring buffer record.
2557 * As of this the changeset introducing this comment, this
2558 * isn't strictly needed, as the fields that can come before
2559 * PERF_SAMPLE_ID are all used, but we'll probably disable
2560 * some of those for things like copying the payload of
2561 * pointer syscall arguments, and for vfs_getname we don't
2562 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2563 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2565 perf_evlist__set_sample_bit(evlist
, IDENTIFIER
);
2566 perf_evlist__reset_sample_bit(evlist
, ID
);
2569 signal(SIGCHLD
, sig_handler
);
2570 signal(SIGINT
, sig_handler
);
2573 err
= perf_evlist__prepare_workload(evlist
, &trace
->opts
.target
,
2576 fprintf(trace
->output
, "Couldn't run the workload!\n");
2577 goto out_delete_evlist
;
2581 err
= perf_evlist__open(evlist
);
2583 goto out_error_open
;
2585 err
= bpf__apply_obj_config();
2587 char errbuf
[BUFSIZ
];
2589 bpf__strerror_apply_obj_config(err
, errbuf
, sizeof(errbuf
));
2590 pr_err("ERROR: Apply config to BPF failed: %s\n",
2592 goto out_error_open
;
2596 * Better not use !target__has_task() here because we need to cover the
2597 * case where no threads were specified in the command line, but a
2598 * workload was, and in that case we will fill in the thread_map when
2599 * we fork the workload in perf_evlist__prepare_workload.
2601 if (trace
->filter_pids
.nr
> 0)
2602 err
= perf_evlist__set_filter_pids(evlist
, trace
->filter_pids
.nr
, trace
->filter_pids
.entries
);
2603 else if (thread_map__pid(evlist
->threads
, 0) == -1)
2604 err
= perf_evlist__set_filter_pid(evlist
, getpid());
2609 if (trace
->ev_qualifier_ids
.nr
> 0) {
2610 err
= trace__set_ev_qualifier_filter(trace
);
2614 pr_debug("event qualifier tracepoint filter: %s\n",
2615 trace
->syscalls
.events
.sys_exit
->filter
);
2618 err
= perf_evlist__apply_filters(evlist
, &evsel
);
2620 goto out_error_apply_filters
;
2622 err
= perf_evlist__mmap(evlist
, trace
->opts
.mmap_pages
, false);
2624 goto out_error_mmap
;
2626 if (!target__none(&trace
->opts
.target
))
2627 perf_evlist__enable(evlist
);
2630 perf_evlist__start_workload(evlist
);
2632 trace
->multiple_threads
= thread_map__pid(evlist
->threads
, 0) == -1 ||
2633 evlist
->threads
->nr
> 1 ||
2634 perf_evlist__first(evlist
)->attr
.inherit
;
2636 before
= trace
->nr_events
;
2638 for (i
= 0; i
< evlist
->nr_mmaps
; i
++) {
2639 union perf_event
*event
;
2641 while ((event
= perf_evlist__mmap_read(evlist
, i
)) != NULL
) {
2642 struct perf_sample sample
;
2646 err
= perf_evlist__parse_sample(evlist
, event
, &sample
);
2648 fprintf(trace
->output
, "Can't parse sample, err = %d, skipping...\n", err
);
2652 trace__handle_event(trace
, event
, &sample
);
2654 perf_evlist__mmap_consume(evlist
, i
);
2659 if (done
&& !draining
) {
2660 perf_evlist__disable(evlist
);
2666 if (trace
->nr_events
== before
) {
2667 int timeout
= done
? 100 : -1;
2669 if (!draining
&& perf_evlist__poll(evlist
, timeout
) > 0) {
2670 if (perf_evlist__filter_pollfd(evlist
, POLLERR
| POLLHUP
) == 0)
2680 thread__zput(trace
->current
);
2682 perf_evlist__disable(evlist
);
2686 trace__fprintf_thread_summary(trace
, trace
->output
);
2688 if (trace
->show_tool_stats
) {
2689 fprintf(trace
->output
, "Stats:\n "
2690 " vfs_getname : %" PRIu64
"\n"
2691 " proc_getname: %" PRIu64
"\n",
2692 trace
->stats
.vfs_getname
,
2693 trace
->stats
.proc_getname
);
2698 perf_evlist__delete(evlist
);
2699 trace
->evlist
= NULL
;
2700 trace
->live
= false;
2703 char errbuf
[BUFSIZ
];
2705 out_error_sched_stat_runtime
:
2706 tracing_path__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "sched", "sched_stat_runtime");
2709 out_error_raw_syscalls
:
2710 tracing_path__strerror_open_tp(errno
, errbuf
, sizeof(errbuf
), "raw_syscalls", "sys_(enter|exit)");
2714 perf_evlist__strerror_mmap(evlist
, errno
, errbuf
, sizeof(errbuf
));
2718 perf_evlist__strerror_open(evlist
, errno
, errbuf
, sizeof(errbuf
));
2721 fprintf(trace
->output
, "%s\n", errbuf
);
2722 goto out_delete_evlist
;
2724 out_error_apply_filters
:
2725 fprintf(trace
->output
,
2726 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2727 evsel
->filter
, perf_evsel__name(evsel
), errno
,
2728 strerror_r(errno
, errbuf
, sizeof(errbuf
)));
2729 goto out_delete_evlist
;
2732 fprintf(trace
->output
, "Not enough memory to run!\n");
2733 goto out_delete_evlist
;
2736 fprintf(trace
->output
, "errno=%d,%s\n", errno
, strerror(errno
));
2737 goto out_delete_evlist
;
2740 static int trace__replay(struct trace
*trace
)
2742 const struct perf_evsel_str_handler handlers
[] = {
2743 { "probe:vfs_getname", trace__vfs_getname
, },
2745 struct perf_data_file file
= {
2747 .mode
= PERF_DATA_MODE_READ
,
2748 .force
= trace
->force
,
2750 struct perf_session
*session
;
2751 struct perf_evsel
*evsel
;
2754 trace
->tool
.sample
= trace__process_sample
;
2755 trace
->tool
.mmap
= perf_event__process_mmap
;
2756 trace
->tool
.mmap2
= perf_event__process_mmap2
;
2757 trace
->tool
.comm
= perf_event__process_comm
;
2758 trace
->tool
.exit
= perf_event__process_exit
;
2759 trace
->tool
.fork
= perf_event__process_fork
;
2760 trace
->tool
.attr
= perf_event__process_attr
;
2761 trace
->tool
.tracing_data
= perf_event__process_tracing_data
;
2762 trace
->tool
.build_id
= perf_event__process_build_id
;
2764 trace
->tool
.ordered_events
= true;
2765 trace
->tool
.ordering_requires_timestamps
= true;
2767 /* add tid to output */
2768 trace
->multiple_threads
= true;
2770 session
= perf_session__new(&file
, false, &trace
->tool
);
2771 if (session
== NULL
)
2774 if (symbol__init(&session
->header
.env
) < 0)
2777 trace
->host
= &session
->machines
.host
;
2779 err
= perf_session__set_tracepoints_handlers(session
, handlers
);
2783 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2784 "raw_syscalls:sys_enter");
2785 /* older kernels have syscalls tp versus raw_syscalls */
2787 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2788 "syscalls:sys_enter");
2791 (perf_evsel__init_syscall_tp(evsel
, trace__sys_enter
) < 0 ||
2792 perf_evsel__init_sc_tp_ptr_field(evsel
, args
))) {
2793 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2797 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2798 "raw_syscalls:sys_exit");
2800 evsel
= perf_evlist__find_tracepoint_by_name(session
->evlist
,
2801 "syscalls:sys_exit");
2803 (perf_evsel__init_syscall_tp(evsel
, trace__sys_exit
) < 0 ||
2804 perf_evsel__init_sc_tp_uint_field(evsel
, ret
))) {
2805 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
2809 evlist__for_each(session
->evlist
, evsel
) {
2810 if (evsel
->attr
.type
== PERF_TYPE_SOFTWARE
&&
2811 (evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MAJ
||
2812 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS_MIN
||
2813 evsel
->attr
.config
== PERF_COUNT_SW_PAGE_FAULTS
))
2814 evsel
->handler
= trace__pgfault
;
2817 err
= parse_target_str(trace
);
2823 err
= perf_session__process_events(session
);
2825 pr_err("Failed to process events, error %d", err
);
2827 else if (trace
->summary
)
2828 trace__fprintf_thread_summary(trace
, trace
->output
);
2831 perf_session__delete(session
);
2836 static size_t trace__fprintf_threads_header(FILE *fp
)
2840 printed
= fprintf(fp
, "\n Summary of events:\n\n");
2845 static size_t thread__dump_stats(struct thread_trace
*ttrace
,
2846 struct trace
*trace
, FILE *fp
)
2848 struct stats
*stats
;
2851 struct int_node
*inode
= intlist__first(ttrace
->syscall_stats
);
2856 printed
+= fprintf(fp
, "\n");
2858 printed
+= fprintf(fp
, " syscall calls total min avg max stddev\n");
2859 printed
+= fprintf(fp
, " (msec) (msec) (msec) (msec) (%%)\n");
2860 printed
+= fprintf(fp
, " --------------- -------- --------- --------- --------- --------- ------\n");
2862 /* each int_node is a syscall */
2864 stats
= inode
->priv
;
2866 double min
= (double)(stats
->min
) / NSEC_PER_MSEC
;
2867 double max
= (double)(stats
->max
) / NSEC_PER_MSEC
;
2868 double avg
= avg_stats(stats
);
2870 u64 n
= (u64
) stats
->n
;
2872 pct
= avg
? 100.0 * stddev_stats(stats
)/avg
: 0.0;
2873 avg
/= NSEC_PER_MSEC
;
2875 sc
= &trace
->syscalls
.table
[inode
->i
];
2876 printed
+= fprintf(fp
, " %-15s", sc
->name
);
2877 printed
+= fprintf(fp
, " %8" PRIu64
" %9.3f %9.3f %9.3f",
2878 n
, avg
* n
, min
, avg
);
2879 printed
+= fprintf(fp
, " %9.3f %9.2f%%\n", max
, pct
);
2882 inode
= intlist__next(inode
);
2885 printed
+= fprintf(fp
, "\n\n");
2890 /* struct used to pass data to per-thread function */
2891 struct summary_data
{
2893 struct trace
*trace
;
2897 static int trace__fprintf_one_thread(struct thread
*thread
, void *priv
)
2899 struct summary_data
*data
= priv
;
2900 FILE *fp
= data
->fp
;
2901 size_t printed
= data
->printed
;
2902 struct trace
*trace
= data
->trace
;
2903 struct thread_trace
*ttrace
= thread__priv(thread
);
2909 ratio
= (double)ttrace
->nr_events
/ trace
->nr_events
* 100.0;
2911 printed
+= fprintf(fp
, " %s (%d), ", thread__comm_str(thread
), thread
->tid
);
2912 printed
+= fprintf(fp
, "%lu events, ", ttrace
->nr_events
);
2913 printed
+= fprintf(fp
, "%.1f%%", ratio
);
2915 printed
+= fprintf(fp
, ", %lu majfaults", ttrace
->pfmaj
);
2917 printed
+= fprintf(fp
, ", %lu minfaults", ttrace
->pfmin
);
2918 printed
+= fprintf(fp
, ", %.3f msec\n", ttrace
->runtime_ms
);
2919 printed
+= thread__dump_stats(ttrace
, trace
, fp
);
2921 data
->printed
+= printed
;
2926 static size_t trace__fprintf_thread_summary(struct trace
*trace
, FILE *fp
)
2928 struct summary_data data
= {
2932 data
.printed
= trace__fprintf_threads_header(fp
);
2934 machine__for_each_thread(trace
->host
, trace__fprintf_one_thread
, &data
);
2936 return data
.printed
;
2939 static int trace__set_duration(const struct option
*opt
, const char *str
,
2940 int unset __maybe_unused
)
2942 struct trace
*trace
= opt
->value
;
2944 trace
->duration_filter
= atof(str
);
2948 static int trace__set_filter_pids(const struct option
*opt
, const char *str
,
2949 int unset __maybe_unused
)
2953 struct trace
*trace
= opt
->value
;
2955 * FIXME: introduce a intarray class, plain parse csv and create a
2956 * { int nr, int entries[] } struct...
2958 struct intlist
*list
= intlist__new(str
);
2963 i
= trace
->filter_pids
.nr
= intlist__nr_entries(list
) + 1;
2964 trace
->filter_pids
.entries
= calloc(i
, sizeof(pid_t
));
2966 if (trace
->filter_pids
.entries
== NULL
)
2969 trace
->filter_pids
.entries
[0] = getpid();
2971 for (i
= 1; i
< trace
->filter_pids
.nr
; ++i
)
2972 trace
->filter_pids
.entries
[i
] = intlist__entry(list
, i
- 1)->i
;
2974 intlist__delete(list
);
2980 static int trace__open_output(struct trace
*trace
, const char *filename
)
2984 if (!stat(filename
, &st
) && st
.st_size
) {
2985 char oldname
[PATH_MAX
];
2987 scnprintf(oldname
, sizeof(oldname
), "%s.old", filename
);
2989 rename(filename
, oldname
);
2992 trace
->output
= fopen(filename
, "w");
2994 return trace
->output
== NULL
? -errno
: 0;
2997 static int parse_pagefaults(const struct option
*opt
, const char *str
,
2998 int unset __maybe_unused
)
3000 int *trace_pgfaults
= opt
->value
;
3002 if (strcmp(str
, "all") == 0)
3003 *trace_pgfaults
|= TRACE_PFMAJ
| TRACE_PFMIN
;
3004 else if (strcmp(str
, "maj") == 0)
3005 *trace_pgfaults
|= TRACE_PFMAJ
;
3006 else if (strcmp(str
, "min") == 0)
3007 *trace_pgfaults
|= TRACE_PFMIN
;
3014 static void evlist__set_evsel_handler(struct perf_evlist
*evlist
, void *handler
)
3016 struct perf_evsel
*evsel
;
3018 evlist__for_each(evlist
, evsel
)
3019 evsel
->handler
= handler
;
3022 int cmd_trace(int argc
, const char **argv
, const char *prefix __maybe_unused
)
3024 const char *trace_usage
[] = {
3025 "perf trace [<options>] [<command>]",
3026 "perf trace [<options>] -- <command> [<options>]",
3027 "perf trace record [<options>] [<command>]",
3028 "perf trace record [<options>] -- <command> [<options>]",
3031 struct trace trace
= {
3040 .user_freq
= UINT_MAX
,
3041 .user_interval
= ULLONG_MAX
,
3042 .no_buffering
= true,
3043 .mmap_pages
= UINT_MAX
,
3044 .proc_map_timeout
= 500,
3048 .trace_syscalls
= true,
3049 .kernel_syscallchains
= false,
3050 .max_stack
= UINT_MAX
,
3052 const char *output_name
= NULL
;
3053 const char *ev_qualifier_str
= NULL
;
3054 const struct option trace_options
[] = {
3055 OPT_CALLBACK(0, "event", &trace
.evlist
, "event",
3056 "event selector. use 'perf list' to list available events",
3057 parse_events_option
),
3058 OPT_BOOLEAN(0, "comm", &trace
.show_comm
,
3059 "show the thread COMM next to its id"),
3060 OPT_BOOLEAN(0, "tool_stats", &trace
.show_tool_stats
, "show tool stats"),
3061 OPT_STRING('e', "expr", &ev_qualifier_str
, "expr", "list of syscalls to trace"),
3062 OPT_STRING('o', "output", &output_name
, "file", "output file name"),
3063 OPT_STRING('i', "input", &input_name
, "file", "Analyze events in file"),
3064 OPT_STRING('p', "pid", &trace
.opts
.target
.pid
, "pid",
3065 "trace events on existing process id"),
3066 OPT_STRING('t', "tid", &trace
.opts
.target
.tid
, "tid",
3067 "trace events on existing thread id"),
3068 OPT_CALLBACK(0, "filter-pids", &trace
, "CSV list of pids",
3069 "pids to filter (by the kernel)", trace__set_filter_pids
),
3070 OPT_BOOLEAN('a', "all-cpus", &trace
.opts
.target
.system_wide
,
3071 "system-wide collection from all CPUs"),
3072 OPT_STRING('C', "cpu", &trace
.opts
.target
.cpu_list
, "cpu",
3073 "list of cpus to monitor"),
3074 OPT_BOOLEAN(0, "no-inherit", &trace
.opts
.no_inherit
,
3075 "child tasks do not inherit counters"),
3076 OPT_CALLBACK('m', "mmap-pages", &trace
.opts
.mmap_pages
, "pages",
3077 "number of mmap data pages",
3078 perf_evlist__parse_mmap_pages
),
3079 OPT_STRING('u', "uid", &trace
.opts
.target
.uid_str
, "user",
3081 OPT_CALLBACK(0, "duration", &trace
, "float",
3082 "show only events with duration > N.M ms",
3083 trace__set_duration
),
3084 OPT_BOOLEAN(0, "sched", &trace
.sched
, "show blocking scheduler events"),
3085 OPT_INCR('v', "verbose", &verbose
, "be more verbose"),
3086 OPT_BOOLEAN('T', "time", &trace
.full_time
,
3087 "Show full timestamp, not time relative to first start"),
3088 OPT_BOOLEAN('s', "summary", &trace
.summary_only
,
3089 "Show only syscall summary with statistics"),
3090 OPT_BOOLEAN('S', "with-summary", &trace
.summary
,
3091 "Show all syscalls and summary with statistics"),
3092 OPT_CALLBACK_DEFAULT('F', "pf", &trace
.trace_pgfaults
, "all|maj|min",
3093 "Trace pagefaults", parse_pagefaults
, "maj"),
3094 OPT_BOOLEAN(0, "syscalls", &trace
.trace_syscalls
, "Trace syscalls"),
3095 OPT_BOOLEAN('f', "force", &trace
.force
, "don't complain, do it"),
3096 OPT_CALLBACK(0, "call-graph", &trace
.opts
,
3097 "record_mode[,record_size]", record_callchain_help
,
3098 &record_parse_callchain_opt
),
3099 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace
.kernel_syscallchains
,
3100 "Show the kernel callchains on the syscall exit path"),
3101 OPT_UINTEGER(0, "min-stack", &trace
.min_stack
,
3102 "Set the minimum stack depth when parsing the callchain, "
3103 "anything below the specified depth will be ignored."),
3104 OPT_UINTEGER(0, "max-stack", &trace
.max_stack
,
3105 "Set the maximum stack depth when parsing the callchain, "
3106 "anything beyond the specified depth will be ignored. "
3107 "Default: " __stringify(PERF_MAX_STACK_DEPTH
)),
3108 OPT_UINTEGER(0, "proc-map-timeout", &trace
.opts
.proc_map_timeout
,
3109 "per thread proc mmap processing timeout in ms"),
3112 bool __maybe_unused max_stack_user_set
= true;
3113 bool mmap_pages_user_set
= true;
3114 const char * const trace_subcommands
[] = { "record", NULL
};
3118 signal(SIGSEGV
, sighandler_dump_stack
);
3119 signal(SIGFPE
, sighandler_dump_stack
);
3121 trace
.evlist
= perf_evlist__new();
3122 trace
.sctbl
= syscalltbl__new();
3124 if (trace
.evlist
== NULL
|| trace
.sctbl
== NULL
) {
3125 pr_err("Not enough memory to run!\n");
3130 argc
= parse_options_subcommand(argc
, argv
, trace_options
, trace_subcommands
,
3131 trace_usage
, PARSE_OPT_STOP_AT_NON_OPTION
);
3133 err
= bpf__setup_stdout(trace
.evlist
);
3135 bpf__strerror_setup_stdout(trace
.evlist
, err
, bf
, sizeof(bf
));
3136 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf
);
3142 if (trace
.trace_pgfaults
) {
3143 trace
.opts
.sample_address
= true;
3144 trace
.opts
.sample_time
= true;
3147 if (trace
.opts
.mmap_pages
== UINT_MAX
)
3148 mmap_pages_user_set
= false;
3150 if (trace
.max_stack
== UINT_MAX
) {
3151 trace
.max_stack
= PERF_MAX_STACK_DEPTH
;
3152 max_stack_user_set
= false;
3155 #ifdef HAVE_DWARF_UNWIND_SUPPORT
3156 if ((trace
.min_stack
|| max_stack_user_set
) && !trace
.opts
.callgraph_set
)
3157 record_opts__parse_callchain(&trace
.opts
, &callchain_param
, "dwarf", false);
3160 if (trace
.opts
.callgraph_set
) {
3161 if (!mmap_pages_user_set
&& geteuid() == 0)
3162 trace
.opts
.mmap_pages
= perf_event_mlock_kb_in_pages() * 4;
3164 symbol_conf
.use_callchain
= true;
3167 if (trace
.evlist
->nr_entries
> 0)
3168 evlist__set_evsel_handler(trace
.evlist
, trace__event_handler
);
3170 if ((argc
>= 1) && (strcmp(argv
[0], "record") == 0))
3171 return trace__record(&trace
, argc
-1, &argv
[1]);
3173 /* summary_only implies summary option, but don't overwrite summary if set */
3174 if (trace
.summary_only
)
3175 trace
.summary
= trace
.summary_only
;
3177 if (!trace
.trace_syscalls
&& !trace
.trace_pgfaults
&&
3178 trace
.evlist
->nr_entries
== 0 /* Was --events used? */) {
3179 pr_err("Please specify something to trace.\n");
3183 if (!trace
.trace_syscalls
&& ev_qualifier_str
) {
3184 pr_err("The -e option can't be used with --no-syscalls.\n");
3188 if (output_name
!= NULL
) {
3189 err
= trace__open_output(&trace
, output_name
);
3191 perror("failed to create output file");
3196 trace
.open_id
= syscalltbl__id(trace
.sctbl
, "open");
3198 if (ev_qualifier_str
!= NULL
) {
3199 const char *s
= ev_qualifier_str
;
3200 struct strlist_config slist_config
= {
3201 .dirname
= system_path(STRACE_GROUPS_DIR
),
3204 trace
.not_ev_qualifier
= *s
== '!';
3205 if (trace
.not_ev_qualifier
)
3207 trace
.ev_qualifier
= strlist__new(s
, &slist_config
);
3208 if (trace
.ev_qualifier
== NULL
) {
3209 fputs("Not enough memory to parse event qualifier",
3215 err
= trace__validate_ev_qualifier(&trace
);
3220 err
= target__validate(&trace
.opts
.target
);
3222 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
3223 fprintf(trace
.output
, "%s", bf
);
3227 err
= target__parse_uid(&trace
.opts
.target
);
3229 target__strerror(&trace
.opts
.target
, err
, bf
, sizeof(bf
));
3230 fprintf(trace
.output
, "%s", bf
);
3234 if (!argc
&& target__none(&trace
.opts
.target
))
3235 trace
.opts
.target
.system_wide
= true;
3238 err
= trace__replay(&trace
);
3240 err
= trace__run(&trace
, argc
, argv
);
3243 if (output_name
!= NULL
)
3244 fclose(trace
.output
);