]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - tools/perf/builtin-trace.c
perf unwind: Provide only forward declarations for pointer types
[mirror_ubuntu-bionic-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
9a3993d4 27#include "util/path.h"
6810fc91 28#include "util/session.h"
752fde44 29#include "util/thread.h"
4b6ab94e 30#include <subcmd/parse-options.h>
2ae3a312 31#include "util/strlist.h"
bdc89661 32#include "util/intlist.h"
514f1c67 33#include "util/thread_map.h"
bf2575c1 34#include "util/stat.h"
fd5cead2 35#include "trace/beauty/beauty.h"
97978b3e 36#include "trace-event.h"
9aca7f17 37#include "util/parse-events.h"
ba504235 38#include "util/bpf-loader.h"
566a0885 39#include "callchain.h"
fea01392 40#include "print_binary.h"
a067558e 41#include "string2.h"
fd0db102 42#include "syscalltbl.h"
96c14451 43#include "rb_resort.h"
514f1c67 44
a43783ae 45#include <errno.h>
fd20e811 46#include <inttypes.h>
fd0db102 47#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 48#include <stdlib.h>
017037ff 49#include <string.h>
8dd2a131 50#include <linux/err.h>
997bba8c
ACM
51#include <linux/filter.h>
52#include <linux/audit.h>
877a7a11 53#include <linux/kernel.h>
39878d49 54#include <linux/random.h>
c6d4a494 55#include <linux/stringify.h>
bd48c63e 56#include <linux/time64.h>
514f1c67 57
3d689ed6
ACM
58#include "sane_ctype.h"
59
c188e7ac
ACM
60#ifndef O_CLOEXEC
61# define O_CLOEXEC 02000000
62#endif
63
d1d438a3
ACM
64struct trace {
65 struct perf_tool tool;
fd0db102 66 struct syscalltbl *sctbl;
d1d438a3
ACM
67 struct {
68 int max;
69 struct syscall *table;
70 struct {
71 struct perf_evsel *sys_enter,
72 *sys_exit;
73 } events;
74 } syscalls;
75 struct record_opts opts;
76 struct perf_evlist *evlist;
77 struct machine *host;
78 struct thread *current;
79 u64 base_time;
80 FILE *output;
81 unsigned long nr_events;
82 struct strlist *ev_qualifier;
83 struct {
84 size_t nr;
85 int *entries;
86 } ev_qualifier_ids;
d1d438a3
ACM
87 struct {
88 size_t nr;
89 pid_t *entries;
90 } filter_pids;
91 double duration_filter;
92 double runtime_ms;
93 struct {
94 u64 vfs_getname,
95 proc_getname;
96 } stats;
c6d4a494 97 unsigned int max_stack;
5cf9c84e 98 unsigned int min_stack;
d1d438a3
ACM
99 bool not_ev_qualifier;
100 bool live;
101 bool full_time;
102 bool sched;
103 bool multiple_threads;
104 bool summary;
105 bool summary_only;
106 bool show_comm;
107 bool show_tool_stats;
108 bool trace_syscalls;
44621819 109 bool kernel_syscallchains;
d1d438a3
ACM
110 bool force;
111 bool vfs_getname;
112 int trace_pgfaults;
fd0db102 113 int open_id;
d1d438a3 114};
a1c2552d 115
77170988
ACM
116struct tp_field {
117 int offset;
118 union {
119 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
120 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
121 };
122};
123
124#define TP_UINT_FIELD(bits) \
125static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
126{ \
55d43bca
DA
127 u##bits value; \
128 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
129 return value; \
77170988
ACM
130}
131
132TP_UINT_FIELD(8);
133TP_UINT_FIELD(16);
134TP_UINT_FIELD(32);
135TP_UINT_FIELD(64);
136
137#define TP_UINT_FIELD__SWAPPED(bits) \
138static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
139{ \
55d43bca
DA
140 u##bits value; \
141 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
142 return bswap_##bits(value);\
143}
144
145TP_UINT_FIELD__SWAPPED(16);
146TP_UINT_FIELD__SWAPPED(32);
147TP_UINT_FIELD__SWAPPED(64);
148
149static int tp_field__init_uint(struct tp_field *field,
150 struct format_field *format_field,
151 bool needs_swap)
152{
153 field->offset = format_field->offset;
154
155 switch (format_field->size) {
156 case 1:
157 field->integer = tp_field__u8;
158 break;
159 case 2:
160 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
161 break;
162 case 4:
163 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
164 break;
165 case 8:
166 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
167 break;
168 default:
169 return -1;
170 }
171
172 return 0;
173}
174
175static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
176{
177 return sample->raw_data + field->offset;
178}
179
180static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
181{
182 field->offset = format_field->offset;
183 field->pointer = tp_field__ptr;
184 return 0;
185}
186
187struct syscall_tp {
188 struct tp_field id;
189 union {
190 struct tp_field args, ret;
191 };
192};
193
194static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
195 struct tp_field *field,
196 const char *name)
197{
198 struct format_field *format_field = perf_evsel__field(evsel, name);
199
200 if (format_field == NULL)
201 return -1;
202
203 return tp_field__init_uint(field, format_field, evsel->needs_swap);
204}
205
206#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
207 ({ struct syscall_tp *sc = evsel->priv;\
208 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
209
210static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
211 struct tp_field *field,
212 const char *name)
213{
214 struct format_field *format_field = perf_evsel__field(evsel, name);
215
216 if (format_field == NULL)
217 return -1;
218
219 return tp_field__init_ptr(field, format_field);
220}
221
222#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
223 ({ struct syscall_tp *sc = evsel->priv;\
224 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
225
226static void perf_evsel__delete_priv(struct perf_evsel *evsel)
227{
04662523 228 zfree(&evsel->priv);
77170988
ACM
229 perf_evsel__delete(evsel);
230}
231
96695d44
NK
232static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
233{
234 evsel->priv = malloc(sizeof(struct syscall_tp));
235 if (evsel->priv != NULL) {
236 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
237 goto out_delete;
238
239 evsel->handler = handler;
240 return 0;
241 }
242
243 return -ENOMEM;
244
245out_delete:
04662523 246 zfree(&evsel->priv);
96695d44
NK
247 return -ENOENT;
248}
249
ef503831 250static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 251{
ef503831 252 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 253
9aca7f17 254 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 255 if (IS_ERR(evsel))
9aca7f17
DA
256 evsel = perf_evsel__newtp("syscalls", direction);
257
8dd2a131
JO
258 if (IS_ERR(evsel))
259 return NULL;
260
261 if (perf_evsel__init_syscall_tp(evsel, handler))
262 goto out_delete;
77170988
ACM
263
264 return evsel;
265
266out_delete:
267 perf_evsel__delete_priv(evsel);
268 return NULL;
269}
270
271#define perf_evsel__sc_tp_uint(evsel, name, sample) \
272 ({ struct syscall_tp *fields = evsel->priv; \
273 fields->name.integer(&fields->name, sample); })
274
275#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
276 ({ struct syscall_tp *fields = evsel->priv; \
277 fields->name.pointer(&fields->name, sample); })
278
1f115cb7 279struct strarray {
03e3adc9 280 int offset;
1f115cb7
ACM
281 int nr_entries;
282 const char **entries;
283};
284
285#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
286 .nr_entries = ARRAY_SIZE(array), \
287 .entries = array, \
288}
289
03e3adc9
ACM
290#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
291 .offset = off, \
292 .nr_entries = ARRAY_SIZE(array), \
293 .entries = array, \
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
1f115cb7 300 struct strarray *sa = arg->parm;
03e3adc9 301 int idx = arg->val - sa->offset;
1f115cb7
ACM
302
303 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 304 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
305
306 return scnprintf(bf, size, "%s", sa->entries[idx]);
307}
308
975b7c2f
ACM
309static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
310 struct syscall_arg *arg)
311{
312 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
313}
314
1f115cb7
ACM
315#define SCA_STRARRAY syscall_arg__scnprintf_strarray
316
844ae5b4
ACM
317#if defined(__i386__) || defined(__x86_64__)
318/*
319 * FIXME: Make this available to all arches as soon as the ioctl beautifier
320 * gets rewritten to support all arches.
321 */
78645cf3
ACM
322static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
323 struct syscall_arg *arg)
324{
325 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
326}
327
328#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 329#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 330
75b757ca
ACM
331static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
332 struct syscall_arg *arg);
333
334#define SCA_FD syscall_arg__scnprintf_fd
335
48e1f91a
ACM
336#ifndef AT_FDCWD
337#define AT_FDCWD -100
338#endif
339
75b757ca
ACM
340static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
341 struct syscall_arg *arg)
342{
343 int fd = arg->val;
344
345 if (fd == AT_FDCWD)
346 return scnprintf(bf, size, "CWD");
347
348 return syscall_arg__scnprintf_fd(bf, size, arg);
349}
350
351#define SCA_FDAT syscall_arg__scnprintf_fd_at
352
353static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
354 struct syscall_arg *arg);
355
356#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
357
6e7eeb51 358static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 359 struct syscall_arg *arg)
13d4ff3e 360{
01533e97 361 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
362}
363
beccb2b5
ACM
364#define SCA_HEX syscall_arg__scnprintf_hex
365
a1c2552d
ACM
366static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
367 struct syscall_arg *arg)
368{
369 return scnprintf(bf, size, "%d", arg->val);
370}
371
372#define SCA_INT syscall_arg__scnprintf_int
373
729a7841
ACM
374static const char *bpf_cmd[] = {
375 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
376 "MAP_GET_NEXT_KEY", "PROG_LOAD",
377};
378static DEFINE_STRARRAY(bpf_cmd);
379
03e3adc9
ACM
380static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
381static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 382
1f115cb7
ACM
383static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
384static DEFINE_STRARRAY(itimers);
385
b62bee1b
ACM
386static const char *keyctl_options[] = {
387 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
388 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
389 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
390 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
391 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
392};
393static DEFINE_STRARRAY(keyctl_options);
394
efe6b882
ACM
395static const char *whences[] = { "SET", "CUR", "END",
396#ifdef SEEK_DATA
397"DATA",
398#endif
399#ifdef SEEK_HOLE
400"HOLE",
401#endif
402};
403static DEFINE_STRARRAY(whences);
f9da0b0c 404
80f587d5
ACM
405static const char *fcntl_cmds[] = {
406 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
407 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
408 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
409 "F_GETOWNER_UIDS",
410};
411static DEFINE_STRARRAY(fcntl_cmds);
412
c045bf02
ACM
413static const char *rlimit_resources[] = {
414 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
415 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
416 "RTTIME",
417};
418static DEFINE_STRARRAY(rlimit_resources);
419
eb5b1b14
ACM
420static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
421static DEFINE_STRARRAY(sighow);
422
4f8c1b74
DA
423static const char *clockid[] = {
424 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
425 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
426 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
427};
428static DEFINE_STRARRAY(clockid);
429
e10bce81
ACM
430static const char *socket_families[] = {
431 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
432 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
433 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
434 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
435 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
436 "ALG", "NFC", "VSOCK",
437};
438static DEFINE_STRARRAY(socket_families);
439
51108999
ACM
440static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
441 struct syscall_arg *arg)
442{
443 size_t printed = 0;
444 int mode = arg->val;
445
446 if (mode == F_OK) /* 0 */
447 return scnprintf(bf, size, "F");
448#define P_MODE(n) \
449 if (mode & n##_OK) { \
450 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
451 mode &= ~n##_OK; \
452 }
453
454 P_MODE(R);
455 P_MODE(W);
456 P_MODE(X);
457#undef P_MODE
458
459 if (mode)
460 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
461
462 return printed;
463}
464
465#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
466
f994592d
ACM
467static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
468 struct syscall_arg *arg);
469
470#define SCA_FILENAME syscall_arg__scnprintf_filename
471
46cce19b
ACM
472static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
473 struct syscall_arg *arg)
474{
475 int printed = 0, flags = arg->val;
476
477#define P_FLAG(n) \
478 if (flags & O_##n) { \
479 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
480 flags &= ~O_##n; \
481 }
482
483 P_FLAG(CLOEXEC);
484 P_FLAG(NONBLOCK);
485#undef P_FLAG
486
487 if (flags)
488 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
489
490 return printed;
491}
492
493#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
494
844ae5b4
ACM
495#if defined(__i386__) || defined(__x86_64__)
496/*
497 * FIXME: Make this available to all arches.
498 */
78645cf3
ACM
499#define TCGETS 0x5401
500
501static const char *tioctls[] = {
502 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
503 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
504 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
505 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
506 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
507 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
508 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
509 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
510 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
511 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
512 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
513 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
514 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
515 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
516 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
517};
518
519static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 520#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 521
a355a61e
ACM
522#ifndef GRND_NONBLOCK
523#define GRND_NONBLOCK 0x0001
524#endif
525#ifndef GRND_RANDOM
526#define GRND_RANDOM 0x0002
527#endif
528
39878d49
ACM
529static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
530 struct syscall_arg *arg)
531{
532 int printed = 0, flags = arg->val;
533
534#define P_FLAG(n) \
535 if (flags & GRND_##n) { \
536 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
537 flags &= ~GRND_##n; \
538 }
539
540 P_FLAG(RANDOM);
541 P_FLAG(NONBLOCK);
542#undef P_FLAG
543
544 if (flags)
545 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
546
547 return printed;
548}
549
550#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
551
453350dd
ACM
552#define STRARRAY(arg, name, array) \
553 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
554 .arg_parm = { [arg] = &strarray__##array, }
555
ea8dc3ce 556#include "trace/beauty/eventfd.c"
8bf382ce 557#include "trace/beauty/flock.c"
d5d71e86 558#include "trace/beauty/futex_op.c"
df4cb167 559#include "trace/beauty/mmap.c"
ba2f22cf 560#include "trace/beauty/mode_t.c"
a30e6259 561#include "trace/beauty/msg_flags.c"
8f48df69 562#include "trace/beauty/open_flags.c"
62de344e 563#include "trace/beauty/perf_event_open.c"
d5d71e86 564#include "trace/beauty/pid.c"
a3bca91f 565#include "trace/beauty/sched_policy.c"
f5cd95ea 566#include "trace/beauty/seccomp.c"
12199d8e 567#include "trace/beauty/signum.c"
bbf86c43 568#include "trace/beauty/socket_type.c"
7206b900 569#include "trace/beauty/waitid_options.c"
a3bca91f 570
514f1c67
ACM
571static struct syscall_fmt {
572 const char *name;
aec1930b 573 const char *alias;
01533e97 574 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 575 void *arg_parm[6];
514f1c67 576 bool errmsg;
11c8e39f 577 bool errpid;
514f1c67 578 bool timeout;
04b34729 579 bool hexret;
514f1c67 580} syscall_fmts[] = {
51108999 581 { .name = "access", .errmsg = true,
12f3ca4f 582 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 583 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 584 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
585 { .name = "brk", .hexret = true,
586 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
587 { .name = "chdir", .errmsg = true, },
588 { .name = "chmod", .errmsg = true, },
589 { .name = "chroot", .errmsg = true, },
4f8c1b74 590 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 591 { .name = "clone", .errpid = true, },
75b757ca 592 { .name = "close", .errmsg = true,
48000a1a 593 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 594 { .name = "connect", .errmsg = true, },
12f3ca4f 595 { .name = "creat", .errmsg = true, },
b6565c90
ACM
596 { .name = "dup", .errmsg = true, },
597 { .name = "dup2", .errmsg = true, },
598 { .name = "dup3", .errmsg = true, },
453350dd 599 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
600 { .name = "eventfd2", .errmsg = true,
601 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 602 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
603 { .name = "fadvise64", .errmsg = true, },
604 { .name = "fallocate", .errmsg = true, },
605 { .name = "fchdir", .errmsg = true, },
606 { .name = "fchmod", .errmsg = true, },
75b757ca 607 { .name = "fchmodat", .errmsg = true,
12f3ca4f 608 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 609 { .name = "fchown", .errmsg = true, },
75b757ca 610 { .name = "fchownat", .errmsg = true,
12f3ca4f 611 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 612 { .name = "fcntl", .errmsg = true,
b6565c90 613 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 614 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 615 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 616 { .name = "flock", .errmsg = true,
b6565c90
ACM
617 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
618 { .name = "fsetxattr", .errmsg = true, },
619 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 620 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
621 { .name = "fstatfs", .errmsg = true, },
622 { .name = "fsync", .errmsg = true, },
623 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
624 { .name = "futex", .errmsg = true,
625 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 626 { .name = "futimesat", .errmsg = true,
12f3ca4f 627 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
628 { .name = "getdents", .errmsg = true, },
629 { .name = "getdents64", .errmsg = true, },
453350dd 630 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 631 { .name = "getpid", .errpid = true, },
d1d438a3 632 { .name = "getpgid", .errpid = true, },
c65f1070 633 { .name = "getppid", .errpid = true, },
39878d49
ACM
634 { .name = "getrandom", .errmsg = true,
635 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 636 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
637 { .name = "getxattr", .errmsg = true, },
638 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 639 { .name = "ioctl", .errmsg = true,
b6565c90 640 .arg_scnprintf = {
844ae5b4
ACM
641#if defined(__i386__) || defined(__x86_64__)
642/*
643 * FIXME: Make this available to all arches.
644 */
78645cf3
ACM
645 [1] = SCA_STRHEXARRAY, /* cmd */
646 [2] = SCA_HEX, /* arg */ },
647 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
648#else
649 [2] = SCA_HEX, /* arg */ }, },
650#endif
b62bee1b 651 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
652 { .name = "kill", .errmsg = true,
653 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
654 { .name = "lchown", .errmsg = true, },
655 { .name = "lgetxattr", .errmsg = true, },
75b757ca 656 { .name = "linkat", .errmsg = true,
48000a1a 657 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
658 { .name = "listxattr", .errmsg = true, },
659 { .name = "llistxattr", .errmsg = true, },
660 { .name = "lremovexattr", .errmsg = true, },
75b757ca 661 { .name = "lseek", .errmsg = true,
b6565c90 662 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 663 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
664 { .name = "lsetxattr", .errmsg = true, },
665 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
666 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
667 { .name = "madvise", .errmsg = true,
668 .arg_scnprintf = { [0] = SCA_HEX, /* start */
669 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 670 { .name = "mkdir", .errmsg = true, },
75b757ca 671 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
672 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
673 { .name = "mknod", .errmsg = true, },
75b757ca 674 { .name = "mknodat", .errmsg = true,
12f3ca4f 675 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
676 { .name = "mlock", .errmsg = true,
677 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
678 { .name = "mlockall", .errmsg = true,
679 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 680 { .name = "mmap", .hexret = true,
ae685380 681 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 682 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 683 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 684 { .name = "mprotect", .errmsg = true,
ae685380
ACM
685 .arg_scnprintf = { [0] = SCA_HEX, /* start */
686 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
687 { .name = "mq_unlink", .errmsg = true,
688 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
689 { .name = "mremap", .hexret = true,
690 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 691 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 692 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
693 { .name = "munlock", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
695 { .name = "munmap", .errmsg = true,
696 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 697 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 698 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 699 { .name = "newfstatat", .errmsg = true,
12f3ca4f 700 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 701 { .name = "open", .errmsg = true,
12f3ca4f 702 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 703 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
704 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
705 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 706 { .name = "openat", .errmsg = true,
75b757ca
ACM
707 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
708 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 709 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 710 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
711 [3] = SCA_FD, /* group_fd */
712 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
713 { .name = "pipe2", .errmsg = true,
714 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
715 { .name = "poll", .errmsg = true, .timeout = true, },
716 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
717 { .name = "pread", .errmsg = true, .alias = "pread64", },
718 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 719 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
720 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
721 { .name = "pwritev", .errmsg = true, },
722 { .name = "read", .errmsg = true, },
12f3ca4f 723 { .name = "readlink", .errmsg = true, },
75b757ca 724 { .name = "readlinkat", .errmsg = true,
12f3ca4f 725 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 726 { .name = "readv", .errmsg = true, },
b2cc99fd 727 { .name = "recvfrom", .errmsg = true,
b6565c90 728 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 729 { .name = "recvmmsg", .errmsg = true,
b6565c90 730 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 731 { .name = "recvmsg", .errmsg = true,
b6565c90 732 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 733 { .name = "removexattr", .errmsg = true, },
75b757ca 734 { .name = "renameat", .errmsg = true,
48000a1a 735 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 736 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
737 { .name = "rt_sigaction", .errmsg = true,
738 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 739 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
740 { .name = "rt_sigqueueinfo", .errmsg = true,
741 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
742 { .name = "rt_tgsigqueueinfo", .errmsg = true,
743 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
744 { .name = "sched_getattr", .errmsg = true, },
745 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
746 { .name = "sched_setscheduler", .errmsg = true,
747 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
748 { .name = "seccomp", .errmsg = true,
749 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
750 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 751 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 752 { .name = "sendmmsg", .errmsg = true,
b6565c90 753 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 754 { .name = "sendmsg", .errmsg = true,
b6565c90 755 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 756 { .name = "sendto", .errmsg = true,
b6565c90 757 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 758 { .name = "set_tid_address", .errpid = true, },
453350dd 759 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 760 { .name = "setpgid", .errmsg = true, },
453350dd 761 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 762 { .name = "setxattr", .errmsg = true, },
b6565c90 763 { .name = "shutdown", .errmsg = true, },
e10bce81 764 { .name = "socket", .errmsg = true,
a28b24b2
ACM
765 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
766 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
767 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
768 { .name = "socketpair", .errmsg = true,
769 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
770 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 771 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
772 { .name = "stat", .errmsg = true, .alias = "newstat", },
773 { .name = "statfs", .errmsg = true, },
fd5cead2
ACM
774 { .name = "statx", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
776 [2] = SCA_STATX_FLAGS, /* flags */
777 [3] = SCA_STATX_MASK, /* mask */ }, },
34221118
ACM
778 { .name = "swapoff", .errmsg = true,
779 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
780 { .name = "swapon", .errmsg = true,
781 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 782 { .name = "symlinkat", .errmsg = true,
48000a1a 783 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
784 { .name = "tgkill", .errmsg = true,
785 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
786 { .name = "tkill", .errmsg = true,
787 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 788 { .name = "truncate", .errmsg = true, },
e5959683 789 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 790 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
791 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
792 { .name = "utime", .errmsg = true, },
75b757ca 793 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
794 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
795 { .name = "utimes", .errmsg = true, },
b6565c90 796 { .name = "vmsplice", .errmsg = true, },
11c8e39f 797 { .name = "wait4", .errpid = true,
7206b900 798 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 799 { .name = "waitid", .errpid = true,
7206b900 800 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
801 { .name = "write", .errmsg = true, },
802 { .name = "writev", .errmsg = true, },
514f1c67
ACM
803};
804
805static int syscall_fmt__cmp(const void *name, const void *fmtp)
806{
807 const struct syscall_fmt *fmt = fmtp;
808 return strcmp(name, fmt->name);
809}
810
811static struct syscall_fmt *syscall_fmt__find(const char *name)
812{
813 const int nmemb = ARRAY_SIZE(syscall_fmts);
814 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
815}
816
817struct syscall {
818 struct event_format *tp_format;
f208bd8d
ACM
819 int nr_args;
820 struct format_field *args;
514f1c67 821 const char *name;
5089f20e 822 bool is_exit;
514f1c67 823 struct syscall_fmt *fmt;
01533e97 824 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 825 void **arg_parm;
514f1c67
ACM
826};
827
fd2b2975
ACM
828/*
829 * We need to have this 'calculated' boolean because in some cases we really
830 * don't know what is the duration of a syscall, for instance, when we start
831 * a session and some threads are waiting for a syscall to finish, say 'poll',
832 * in which case all we can do is to print "( ? ) for duration and for the
833 * start timestamp.
834 */
835static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
836{
837 double duration = (double)t / NSEC_PER_MSEC;
838 size_t printed = fprintf(fp, "(");
839
fd2b2975
ACM
840 if (!calculated)
841 printed += fprintf(fp, " ? ");
842 else if (duration >= 1.0)
60c907ab
ACM
843 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
844 else if (duration >= 0.01)
845 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
846 else
847 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 848 return printed + fprintf(fp, "): ");
60c907ab
ACM
849}
850
f994592d
ACM
851/**
852 * filename.ptr: The filename char pointer that will be vfs_getname'd
853 * filename.entry_str_pos: Where to insert the string translated from
854 * filename.ptr by the vfs_getname tracepoint/kprobe.
855 */
752fde44
ACM
856struct thread_trace {
857 u64 entry_time;
752fde44 858 bool entry_pending;
efd5745e 859 unsigned long nr_events;
a2ea67d7 860 unsigned long pfmaj, pfmin;
752fde44 861 char *entry_str;
1302d88e 862 double runtime_ms;
f994592d
ACM
863 struct {
864 unsigned long ptr;
7f4f8001
ACM
865 short int entry_str_pos;
866 bool pending_open;
867 unsigned int namelen;
868 char *name;
f994592d 869 } filename;
75b757ca
ACM
870 struct {
871 int max;
872 char **table;
873 } paths;
bf2575c1
DA
874
875 struct intlist *syscall_stats;
752fde44
ACM
876};
877
878static struct thread_trace *thread_trace__new(void)
879{
75b757ca
ACM
880 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
881
882 if (ttrace)
883 ttrace->paths.max = -1;
884
bf2575c1
DA
885 ttrace->syscall_stats = intlist__new(NULL);
886
75b757ca 887 return ttrace;
752fde44
ACM
888}
889
c24ff998 890static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 891{
efd5745e
ACM
892 struct thread_trace *ttrace;
893
752fde44
ACM
894 if (thread == NULL)
895 goto fail;
896
89dceb22
NK
897 if (thread__priv(thread) == NULL)
898 thread__set_priv(thread, thread_trace__new());
48000a1a 899
89dceb22 900 if (thread__priv(thread) == NULL)
752fde44
ACM
901 goto fail;
902
89dceb22 903 ttrace = thread__priv(thread);
efd5745e
ACM
904 ++ttrace->nr_events;
905
906 return ttrace;
752fde44 907fail:
c24ff998 908 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
909 "WARNING: not enough memory, dropping samples!\n");
910 return NULL;
911}
912
598d02c5
SF
913#define TRACE_PFMAJ (1 << 0)
914#define TRACE_PFMIN (1 << 1)
915
e4d44e83
ACM
916static const size_t trace__entry_str_size = 2048;
917
97119f37 918static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 919{
89dceb22 920 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
921
922 if (fd > ttrace->paths.max) {
923 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
924
925 if (npath == NULL)
926 return -1;
927
928 if (ttrace->paths.max != -1) {
929 memset(npath + ttrace->paths.max + 1, 0,
930 (fd - ttrace->paths.max) * sizeof(char *));
931 } else {
932 memset(npath, 0, (fd + 1) * sizeof(char *));
933 }
934
935 ttrace->paths.table = npath;
936 ttrace->paths.max = fd;
937 }
938
939 ttrace->paths.table[fd] = strdup(pathname);
940
941 return ttrace->paths.table[fd] != NULL ? 0 : -1;
942}
943
97119f37
ACM
944static int thread__read_fd_path(struct thread *thread, int fd)
945{
946 char linkname[PATH_MAX], pathname[PATH_MAX];
947 struct stat st;
948 int ret;
949
950 if (thread->pid_ == thread->tid) {
951 scnprintf(linkname, sizeof(linkname),
952 "/proc/%d/fd/%d", thread->pid_, fd);
953 } else {
954 scnprintf(linkname, sizeof(linkname),
955 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
956 }
957
958 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
959 return -1;
960
961 ret = readlink(linkname, pathname, sizeof(pathname));
962
963 if (ret < 0 || ret > st.st_size)
964 return -1;
965
966 pathname[ret] = '\0';
967 return trace__set_fd_pathname(thread, fd, pathname);
968}
969
c522739d
ACM
970static const char *thread__fd_path(struct thread *thread, int fd,
971 struct trace *trace)
75b757ca 972{
89dceb22 973 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
974
975 if (ttrace == NULL)
976 return NULL;
977
978 if (fd < 0)
979 return NULL;
980
cdcd1e6b 981 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
982 if (!trace->live)
983 return NULL;
984 ++trace->stats.proc_getname;
cdcd1e6b 985 if (thread__read_fd_path(thread, fd))
c522739d
ACM
986 return NULL;
987 }
75b757ca
ACM
988
989 return ttrace->paths.table[fd];
990}
991
992static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
993 struct syscall_arg *arg)
994{
995 int fd = arg->val;
996 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 997 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
998
999 if (path)
1000 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1001
1002 return printed;
1003}
1004
1005static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1006 struct syscall_arg *arg)
1007{
1008 int fd = arg->val;
1009 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1010 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1011
04662523
ACM
1012 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1013 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1014
1015 return printed;
1016}
1017
f994592d
ACM
1018static void thread__set_filename_pos(struct thread *thread, const char *bf,
1019 unsigned long ptr)
1020{
1021 struct thread_trace *ttrace = thread__priv(thread);
1022
1023 ttrace->filename.ptr = ptr;
1024 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1025}
1026
1027static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1028 struct syscall_arg *arg)
1029{
1030 unsigned long ptr = arg->val;
1031
1032 if (!arg->trace->vfs_getname)
1033 return scnprintf(bf, size, "%#x", ptr);
1034
1035 thread__set_filename_pos(arg->thread, bf, ptr);
1036 return 0;
1037}
1038
ae9ed035
ACM
1039static bool trace__filter_duration(struct trace *trace, double t)
1040{
1041 return t < (trace->duration_filter * NSEC_PER_MSEC);
1042}
1043
fd2b2975 1044static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1045{
1046 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1047
60c907ab 1048 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1049}
1050
fd2b2975
ACM
1051/*
1052 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1053 * using ttrace->entry_time for a thread that receives a sys_exit without
1054 * first having received a sys_enter ("poll" issued before tracing session
1055 * starts, lost sys_enter exit due to ring buffer overflow).
1056 */
1057static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1058{
1059 if (tstamp > 0)
1060 return __trace__fprintf_tstamp(trace, tstamp, fp);
1061
1062 return fprintf(fp, " ? ");
1063}
1064
f15eb531 1065static bool done = false;
ba209f85 1066static bool interrupted = false;
f15eb531 1067
ba209f85 1068static void sig_handler(int sig)
f15eb531
NK
1069{
1070 done = true;
ba209f85 1071 interrupted = sig == SIGINT;
f15eb531
NK
1072}
1073
752fde44 1074static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1075 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1076{
1077 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1078 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1079
50c95cbd
ACM
1080 if (trace->multiple_threads) {
1081 if (trace->show_comm)
1902efe7 1082 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1083 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1084 }
752fde44
ACM
1085
1086 return printed;
1087}
1088
c24ff998 1089static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1090 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1091{
1092 int ret = 0;
1093
1094 switch (event->header.type) {
1095 case PERF_RECORD_LOST:
c24ff998 1096 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1097 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1098 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1099 break;
752fde44 1100 default:
162f0bef 1101 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1102 break;
1103 }
1104
1105 return ret;
1106}
1107
c24ff998 1108static int trace__tool_process(struct perf_tool *tool,
752fde44 1109 union perf_event *event,
162f0bef 1110 struct perf_sample *sample,
752fde44
ACM
1111 struct machine *machine)
1112{
c24ff998 1113 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1114 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1115}
1116
caf8a0d0
ACM
1117static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1118{
1119 struct machine *machine = vmachine;
1120
1121 if (machine->kptr_restrict_warned)
1122 return NULL;
1123
1124 if (symbol_conf.kptr_restrict) {
1125 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1126 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1127 "Kernel samples will not be resolved.\n");
1128 machine->kptr_restrict_warned = true;
1129 return NULL;
1130 }
1131
1132 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1133}
1134
752fde44
ACM
1135static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1136{
0a7e6d1b 1137 int err = symbol__init(NULL);
752fde44
ACM
1138
1139 if (err)
1140 return err;
1141
8fb598e5
DA
1142 trace->host = machine__new_host();
1143 if (trace->host == NULL)
1144 return -ENOMEM;
752fde44 1145
caf8a0d0 1146 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1147 return -errno;
1148
a33fbd56 1149 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1150 evlist->threads, trace__tool_process, false,
1151 trace->opts.proc_map_timeout);
752fde44
ACM
1152 if (err)
1153 symbol__exit();
1154
1155 return err;
1156}
1157
13d4ff3e
ACM
1158static int syscall__set_arg_fmts(struct syscall *sc)
1159{
1160 struct format_field *field;
b6565c90 1161 int idx = 0, len;
13d4ff3e 1162
f208bd8d 1163 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1164 if (sc->arg_scnprintf == NULL)
1165 return -1;
1166
1f115cb7
ACM
1167 if (sc->fmt)
1168 sc->arg_parm = sc->fmt->arg_parm;
1169
f208bd8d 1170 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1171 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1172 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1173 else if (strcmp(field->type, "const char *") == 0 &&
1174 (strcmp(field->name, "filename") == 0 ||
1175 strcmp(field->name, "path") == 0 ||
1176 strcmp(field->name, "pathname") == 0))
1177 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1178 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1179 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1180 else if (strcmp(field->type, "pid_t") == 0)
1181 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1182 else if (strcmp(field->type, "umode_t") == 0)
1183 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1184 else if ((strcmp(field->type, "int") == 0 ||
1185 strcmp(field->type, "unsigned int") == 0 ||
1186 strcmp(field->type, "long") == 0) &&
1187 (len = strlen(field->name)) >= 2 &&
1188 strcmp(field->name + len - 2, "fd") == 0) {
1189 /*
1190 * /sys/kernel/tracing/events/syscalls/sys_enter*
1191 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1192 * 65 int
1193 * 23 unsigned int
1194 * 7 unsigned long
1195 */
1196 sc->arg_scnprintf[idx] = SCA_FD;
1197 }
13d4ff3e
ACM
1198 ++idx;
1199 }
1200
1201 return 0;
1202}
1203
514f1c67
ACM
1204static int trace__read_syscall_info(struct trace *trace, int id)
1205{
1206 char tp_name[128];
1207 struct syscall *sc;
fd0db102 1208 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1209
1210 if (name == NULL)
1211 return -1;
514f1c67
ACM
1212
1213 if (id > trace->syscalls.max) {
1214 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1215
1216 if (nsyscalls == NULL)
1217 return -1;
1218
1219 if (trace->syscalls.max != -1) {
1220 memset(nsyscalls + trace->syscalls.max + 1, 0,
1221 (id - trace->syscalls.max) * sizeof(*sc));
1222 } else {
1223 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1224 }
1225
1226 trace->syscalls.table = nsyscalls;
1227 trace->syscalls.max = id;
1228 }
1229
1230 sc = trace->syscalls.table + id;
3a531260 1231 sc->name = name;
2ae3a312 1232
3a531260 1233 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1234
aec1930b 1235 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1236 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1237
8dd2a131 1238 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1239 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1240 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1241 }
514f1c67 1242
8dd2a131 1243 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1244 return -1;
1245
f208bd8d
ACM
1246 sc->args = sc->tp_format->format.fields;
1247 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1248 /*
1249 * We need to check and discard the first variable '__syscall_nr'
1250 * or 'nr' that mean the syscall number. It is needless here.
1251 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1252 */
1253 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1254 sc->args = sc->args->next;
1255 --sc->nr_args;
1256 }
1257
5089f20e
ACM
1258 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1259
13d4ff3e 1260 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1261}
1262
d0cc439b
ACM
1263static int trace__validate_ev_qualifier(struct trace *trace)
1264{
8b3ce757 1265 int err = 0, i;
d0cc439b
ACM
1266 struct str_node *pos;
1267
8b3ce757
ACM
1268 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1269 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1270 sizeof(trace->ev_qualifier_ids.entries[0]));
1271
1272 if (trace->ev_qualifier_ids.entries == NULL) {
1273 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1274 trace->output);
1275 err = -EINVAL;
1276 goto out;
1277 }
1278
1279 i = 0;
1280
602a1f4d 1281 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1282 const char *sc = pos->s;
fd0db102 1283 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1284
8b3ce757 1285 if (id < 0) {
d0cc439b
ACM
1286 if (err == 0) {
1287 fputs("Error:\tInvalid syscall ", trace->output);
1288 err = -EINVAL;
1289 } else {
1290 fputs(", ", trace->output);
1291 }
1292
1293 fputs(sc, trace->output);
1294 }
8b3ce757
ACM
1295
1296 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1297 }
1298
1299 if (err < 0) {
1300 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1301 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1302 zfree(&trace->ev_qualifier_ids.entries);
1303 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1304 }
8b3ce757 1305out:
d0cc439b
ACM
1306 return err;
1307}
1308
55d43bca
DA
1309/*
1310 * args is to be interpreted as a series of longs but we need to handle
1311 * 8-byte unaligned accesses. args points to raw_data within the event
1312 * and raw_data is guaranteed to be 8-byte unaligned because it is
1313 * preceded by raw_size which is a u32. So we need to copy args to a temp
1314 * variable to read it. Most notably this avoids extended load instructions
1315 * on unaligned addresses
1316 */
1317
752fde44 1318static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1319 unsigned char *args, struct trace *trace,
75b757ca 1320 struct thread *thread)
514f1c67 1321{
514f1c67 1322 size_t printed = 0;
55d43bca
DA
1323 unsigned char *p;
1324 unsigned long val;
514f1c67 1325
f208bd8d 1326 if (sc->args != NULL) {
514f1c67 1327 struct format_field *field;
01533e97
ACM
1328 u8 bit = 1;
1329 struct syscall_arg arg = {
75b757ca
ACM
1330 .idx = 0,
1331 .mask = 0,
1332 .trace = trace,
1333 .thread = thread,
01533e97 1334 };
6e7eeb51 1335
f208bd8d 1336 for (field = sc->args; field;
01533e97
ACM
1337 field = field->next, ++arg.idx, bit <<= 1) {
1338 if (arg.mask & bit)
6e7eeb51 1339 continue;
55d43bca
DA
1340
1341 /* special care for unaligned accesses */
1342 p = args + sizeof(unsigned long) * arg.idx;
1343 memcpy(&val, p, sizeof(val));
1344
4aa58232
ACM
1345 /*
1346 * Suppress this argument if its value is zero and
1347 * and we don't have a string associated in an
1348 * strarray for it.
1349 */
55d43bca 1350 if (val == 0 &&
4aa58232
ACM
1351 !(sc->arg_scnprintf &&
1352 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1353 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1354 continue;
1355
752fde44 1356 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1357 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1358 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1359 arg.val = val;
1f115cb7
ACM
1360 if (sc->arg_parm)
1361 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1362 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1363 size - printed, &arg);
6e7eeb51 1364 } else {
13d4ff3e 1365 printed += scnprintf(bf + printed, size - printed,
55d43bca 1366 "%ld", val);
6e7eeb51 1367 }
514f1c67 1368 }
4c4d6e51
ACM
1369 } else if (IS_ERR(sc->tp_format)) {
1370 /*
1371 * If we managed to read the tracepoint /format file, then we
1372 * may end up not having any args, like with gettid(), so only
1373 * print the raw args when we didn't manage to read it.
1374 */
01533e97
ACM
1375 int i = 0;
1376
514f1c67 1377 while (i < 6) {
55d43bca
DA
1378 /* special care for unaligned accesses */
1379 p = args + sizeof(unsigned long) * i;
1380 memcpy(&val, p, sizeof(val));
752fde44
ACM
1381 printed += scnprintf(bf + printed, size - printed,
1382 "%sarg%d: %ld",
55d43bca 1383 printed ? ", " : "", i, val);
514f1c67
ACM
1384 ++i;
1385 }
1386 }
1387
1388 return printed;
1389}
1390
ba3d7dee 1391typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1392 union perf_event *event,
ba3d7dee
ACM
1393 struct perf_sample *sample);
1394
1395static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1396 struct perf_evsel *evsel, int id)
ba3d7dee 1397{
ba3d7dee
ACM
1398
1399 if (id < 0) {
adaa18bf
ACM
1400
1401 /*
1402 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1403 * before that, leaving at a higher verbosity level till that is
1404 * explained. Reproduced with plain ftrace with:
1405 *
1406 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1407 * grep "NR -1 " /t/trace_pipe
1408 *
1409 * After generating some load on the machine.
1410 */
1411 if (verbose > 1) {
1412 static u64 n;
1413 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1414 id, perf_evsel__name(evsel), ++n);
1415 }
ba3d7dee
ACM
1416 return NULL;
1417 }
1418
1419 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1420 trace__read_syscall_info(trace, id))
1421 goto out_cant_read;
1422
1423 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1424 goto out_cant_read;
1425
1426 return &trace->syscalls.table[id];
1427
1428out_cant_read:
bb963e16 1429 if (verbose > 0) {
7c304ee0
ACM
1430 fprintf(trace->output, "Problems reading syscall %d", id);
1431 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1432 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1433 fputs(" information\n", trace->output);
1434 }
ba3d7dee
ACM
1435 return NULL;
1436}
1437
bf2575c1
DA
1438static void thread__update_stats(struct thread_trace *ttrace,
1439 int id, struct perf_sample *sample)
1440{
1441 struct int_node *inode;
1442 struct stats *stats;
1443 u64 duration = 0;
1444
1445 inode = intlist__findnew(ttrace->syscall_stats, id);
1446 if (inode == NULL)
1447 return;
1448
1449 stats = inode->priv;
1450 if (stats == NULL) {
1451 stats = malloc(sizeof(struct stats));
1452 if (stats == NULL)
1453 return;
1454 init_stats(stats);
1455 inode->priv = stats;
1456 }
1457
1458 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1459 duration = sample->time - ttrace->entry_time;
1460
1461 update_stats(stats, duration);
1462}
1463
e596663e
ACM
1464static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1465{
1466 struct thread_trace *ttrace;
1467 u64 duration;
1468 size_t printed;
1469
1470 if (trace->current == NULL)
1471 return 0;
1472
1473 ttrace = thread__priv(trace->current);
1474
1475 if (!ttrace->entry_pending)
1476 return 0;
1477
1478 duration = sample->time - ttrace->entry_time;
1479
fd2b2975 1480 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1481 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1482 ttrace->entry_pending = false;
1483
1484 return printed;
1485}
1486
ba3d7dee 1487static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1488 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1489 struct perf_sample *sample)
1490{
752fde44 1491 char *msg;
ba3d7dee 1492 void *args;
752fde44 1493 size_t printed = 0;
2ae3a312 1494 struct thread *thread;
b91fc39f 1495 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1496 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1497 struct thread_trace *ttrace;
1498
1499 if (sc == NULL)
1500 return -1;
ba3d7dee 1501
8fb598e5 1502 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1503 ttrace = thread__trace(thread, trace->output);
2ae3a312 1504 if (ttrace == NULL)
b91fc39f 1505 goto out_put;
ba3d7dee 1506
77170988 1507 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1508
1509 if (ttrace->entry_str == NULL) {
e4d44e83 1510 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1511 if (!ttrace->entry_str)
b91fc39f 1512 goto out_put;
752fde44
ACM
1513 }
1514
5cf9c84e 1515 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1516 trace__printf_interrupted_entry(trace, sample);
e596663e 1517
752fde44
ACM
1518 ttrace->entry_time = sample->time;
1519 msg = ttrace->entry_str;
e4d44e83 1520 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1521
e4d44e83 1522 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1523 args, trace, thread);
752fde44 1524
5089f20e 1525 if (sc->is_exit) {
5cf9c84e 1526 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1527 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1528 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1529 }
7f4f8001 1530 } else {
752fde44 1531 ttrace->entry_pending = true;
7f4f8001
ACM
1532 /* See trace__vfs_getname & trace__sys_exit */
1533 ttrace->filename.pending_open = false;
1534 }
ba3d7dee 1535
f3b623b8
ACM
1536 if (trace->current != thread) {
1537 thread__put(trace->current);
1538 trace->current = thread__get(thread);
1539 }
b91fc39f
ACM
1540 err = 0;
1541out_put:
1542 thread__put(thread);
1543 return err;
ba3d7dee
ACM
1544}
1545
5cf9c84e
ACM
1546static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1547 struct perf_sample *sample,
1548 struct callchain_cursor *cursor)
202ff968
ACM
1549{
1550 struct addr_location al;
5cf9c84e
ACM
1551
1552 if (machine__resolve(trace->host, &al, sample) < 0 ||
1553 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1554 return -1;
1555
1556 return 0;
1557}
1558
1559static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1560{
202ff968 1561 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1562 const unsigned int print_opts = EVSEL__PRINT_SYM |
1563 EVSEL__PRINT_DSO |
1564 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1565
d327e60c 1566 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1567}
1568
ba3d7dee 1569static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1570 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1571 struct perf_sample *sample)
1572{
2c82c3ad 1573 long ret;
60c907ab 1574 u64 duration = 0;
fd2b2975 1575 bool duration_calculated = false;
2ae3a312 1576 struct thread *thread;
5cf9c84e 1577 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1578 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1579 struct thread_trace *ttrace;
1580
1581 if (sc == NULL)
1582 return -1;
ba3d7dee 1583
8fb598e5 1584 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1585 ttrace = thread__trace(thread, trace->output);
2ae3a312 1586 if (ttrace == NULL)
b91fc39f 1587 goto out_put;
ba3d7dee 1588
bf2575c1
DA
1589 if (trace->summary)
1590 thread__update_stats(ttrace, id, sample);
1591
77170988 1592 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1593
fd0db102 1594 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1595 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1596 ttrace->filename.pending_open = false;
c522739d
ACM
1597 ++trace->stats.vfs_getname;
1598 }
1599
ae9ed035 1600 if (ttrace->entry_time) {
60c907ab 1601 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1602 if (trace__filter_duration(trace, duration))
1603 goto out;
fd2b2975 1604 duration_calculated = true;
ae9ed035
ACM
1605 } else if (trace->duration_filter)
1606 goto out;
60c907ab 1607
5cf9c84e
ACM
1608 if (sample->callchain) {
1609 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1610 if (callchain_ret == 0) {
1611 if (callchain_cursor.nr < trace->min_stack)
1612 goto out;
1613 callchain_ret = 1;
1614 }
1615 }
1616
fd2eabaf
DA
1617 if (trace->summary_only)
1618 goto out;
1619
fd2b2975 1620 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1621
1622 if (ttrace->entry_pending) {
c24ff998 1623 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1624 } else {
c24ff998
ACM
1625 fprintf(trace->output, " ... [");
1626 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1627 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1628 }
1629
da3c9a44
ACM
1630 if (sc->fmt == NULL) {
1631signed_print:
2c82c3ad 1632 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1633 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1634 char bf[STRERR_BUFSIZE];
c8b5f2c9 1635 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1636 *e = audit_errno_to_name(-ret);
1637
c24ff998 1638 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1639 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1640 fprintf(trace->output, ") = 0 Timeout");
04b34729 1641 else if (sc->fmt->hexret)
2c82c3ad 1642 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1643 else if (sc->fmt->errpid) {
1644 struct thread *child = machine__find_thread(trace->host, ret, ret);
1645
1646 if (child != NULL) {
1647 fprintf(trace->output, ") = %ld", ret);
1648 if (child->comm_set)
1649 fprintf(trace->output, " (%s)", thread__comm_str(child));
1650 thread__put(child);
1651 }
1652 } else
da3c9a44 1653 goto signed_print;
ba3d7dee 1654
c24ff998 1655 fputc('\n', trace->output);
566a0885 1656
5cf9c84e
ACM
1657 if (callchain_ret > 0)
1658 trace__fprintf_callchain(trace, sample);
1659 else if (callchain_ret < 0)
1660 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1661out:
752fde44 1662 ttrace->entry_pending = false;
b91fc39f
ACM
1663 err = 0;
1664out_put:
1665 thread__put(thread);
1666 return err;
ba3d7dee
ACM
1667}
1668
c522739d 1669static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1670 union perf_event *event __maybe_unused,
c522739d
ACM
1671 struct perf_sample *sample)
1672{
f994592d
ACM
1673 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1674 struct thread_trace *ttrace;
1675 size_t filename_len, entry_str_len, to_move;
1676 ssize_t remaining_space;
1677 char *pos;
7f4f8001 1678 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1679
1680 if (!thread)
1681 goto out;
1682
1683 ttrace = thread__priv(thread);
1684 if (!ttrace)
ef65e96e 1685 goto out_put;
f994592d 1686
7f4f8001 1687 filename_len = strlen(filename);
39f0e7a8 1688 if (filename_len == 0)
ef65e96e 1689 goto out_put;
7f4f8001
ACM
1690
1691 if (ttrace->filename.namelen < filename_len) {
1692 char *f = realloc(ttrace->filename.name, filename_len + 1);
1693
1694 if (f == NULL)
ef65e96e 1695 goto out_put;
7f4f8001
ACM
1696
1697 ttrace->filename.namelen = filename_len;
1698 ttrace->filename.name = f;
1699 }
1700
1701 strcpy(ttrace->filename.name, filename);
1702 ttrace->filename.pending_open = true;
1703
f994592d 1704 if (!ttrace->filename.ptr)
ef65e96e 1705 goto out_put;
f994592d
ACM
1706
1707 entry_str_len = strlen(ttrace->entry_str);
1708 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1709 if (remaining_space <= 0)
ef65e96e 1710 goto out_put;
f994592d 1711
f994592d
ACM
1712 if (filename_len > (size_t)remaining_space) {
1713 filename += filename_len - remaining_space;
1714 filename_len = remaining_space;
1715 }
1716
1717 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1718 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1719 memmove(pos + filename_len, pos, to_move);
1720 memcpy(pos, filename, filename_len);
1721
1722 ttrace->filename.ptr = 0;
1723 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1724out_put:
1725 thread__put(thread);
f994592d 1726out:
c522739d
ACM
1727 return 0;
1728}
1729
1302d88e 1730static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1731 union perf_event *event __maybe_unused,
1302d88e
ACM
1732 struct perf_sample *sample)
1733{
1734 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1735 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1736 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1737 sample->pid,
1738 sample->tid);
c24ff998 1739 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1740
1741 if (ttrace == NULL)
1742 goto out_dump;
1743
1744 ttrace->runtime_ms += runtime_ms;
1745 trace->runtime_ms += runtime_ms;
ef65e96e 1746out_put:
b91fc39f 1747 thread__put(thread);
1302d88e
ACM
1748 return 0;
1749
1750out_dump:
c24ff998 1751 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1752 evsel->name,
1753 perf_evsel__strval(evsel, sample, "comm"),
1754 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1755 runtime,
1756 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1757 goto out_put;
1302d88e
ACM
1758}
1759
1d6c9407
WN
1760static void bpf_output__printer(enum binary_printer_ops op,
1761 unsigned int val, void *extra)
1762{
1763 FILE *output = extra;
1764 unsigned char ch = (unsigned char)val;
1765
1766 switch (op) {
1767 case BINARY_PRINT_CHAR_DATA:
1768 fprintf(output, "%c", isprint(ch) ? ch : '.');
1769 break;
1770 case BINARY_PRINT_DATA_BEGIN:
1771 case BINARY_PRINT_LINE_BEGIN:
1772 case BINARY_PRINT_ADDR:
1773 case BINARY_PRINT_NUM_DATA:
1774 case BINARY_PRINT_NUM_PAD:
1775 case BINARY_PRINT_SEP:
1776 case BINARY_PRINT_CHAR_PAD:
1777 case BINARY_PRINT_LINE_END:
1778 case BINARY_PRINT_DATA_END:
1779 default:
1780 break;
1781 }
1782}
1783
1784static void bpf_output__fprintf(struct trace *trace,
1785 struct perf_sample *sample)
1786{
1787 print_binary(sample->raw_data, sample->raw_size, 8,
1788 bpf_output__printer, trace->output);
1789}
1790
14a052df
ACM
1791static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1792 union perf_event *event __maybe_unused,
1793 struct perf_sample *sample)
1794{
7ad35615
ACM
1795 int callchain_ret = 0;
1796
1797 if (sample->callchain) {
1798 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1799 if (callchain_ret == 0) {
1800 if (callchain_cursor.nr < trace->min_stack)
1801 goto out;
1802 callchain_ret = 1;
1803 }
1804 }
1805
14a052df
ACM
1806 trace__printf_interrupted_entry(trace, sample);
1807 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1808
1809 if (trace->trace_syscalls)
1810 fprintf(trace->output, "( ): ");
1811
1812 fprintf(trace->output, "%s:", evsel->name);
14a052df 1813
1d6c9407
WN
1814 if (perf_evsel__is_bpf_output(evsel)) {
1815 bpf_output__fprintf(trace, sample);
1816 } else if (evsel->tp_format) {
14a052df
ACM
1817 event_format__fprintf(evsel->tp_format, sample->cpu,
1818 sample->raw_data, sample->raw_size,
1819 trace->output);
1820 }
1821
1822 fprintf(trace->output, ")\n");
202ff968 1823
7ad35615
ACM
1824 if (callchain_ret > 0)
1825 trace__fprintf_callchain(trace, sample);
1826 else if (callchain_ret < 0)
1827 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1828out:
14a052df
ACM
1829 return 0;
1830}
1831
598d02c5
SF
1832static void print_location(FILE *f, struct perf_sample *sample,
1833 struct addr_location *al,
1834 bool print_dso, bool print_sym)
1835{
1836
bb963e16 1837 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1838 fprintf(f, "%s@", al->map->dso->long_name);
1839
bb963e16 1840 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1841 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1842 al->addr - al->sym->start);
1843 else if (al->map)
4414a3c5 1844 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1845 else
4414a3c5 1846 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1847}
1848
1849static int trace__pgfault(struct trace *trace,
1850 struct perf_evsel *evsel,
473398a2 1851 union perf_event *event __maybe_unused,
598d02c5
SF
1852 struct perf_sample *sample)
1853{
1854 struct thread *thread;
598d02c5
SF
1855 struct addr_location al;
1856 char map_type = 'd';
a2ea67d7 1857 struct thread_trace *ttrace;
b91fc39f 1858 int err = -1;
1df54290 1859 int callchain_ret = 0;
598d02c5
SF
1860
1861 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1862
1863 if (sample->callchain) {
1864 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1865 if (callchain_ret == 0) {
1866 if (callchain_cursor.nr < trace->min_stack)
1867 goto out_put;
1868 callchain_ret = 1;
1869 }
1870 }
1871
a2ea67d7
SF
1872 ttrace = thread__trace(thread, trace->output);
1873 if (ttrace == NULL)
b91fc39f 1874 goto out_put;
a2ea67d7
SF
1875
1876 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1877 ttrace->pfmaj++;
1878 else
1879 ttrace->pfmin++;
1880
1881 if (trace->summary_only)
b91fc39f 1882 goto out;
598d02c5 1883
473398a2 1884 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1885 sample->ip, &al);
1886
fd2b2975 1887 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1888
1889 fprintf(trace->output, "%sfault [",
1890 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1891 "maj" : "min");
1892
1893 print_location(trace->output, sample, &al, false, true);
1894
1895 fprintf(trace->output, "] => ");
1896
473398a2 1897 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1898 sample->addr, &al);
1899
1900 if (!al.map) {
473398a2 1901 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1902 MAP__FUNCTION, sample->addr, &al);
1903
1904 if (al.map)
1905 map_type = 'x';
1906 else
1907 map_type = '?';
1908 }
1909
1910 print_location(trace->output, sample, &al, true, false);
1911
1912 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1913
1df54290
ACM
1914 if (callchain_ret > 0)
1915 trace__fprintf_callchain(trace, sample);
1916 else if (callchain_ret < 0)
1917 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1918out:
1919 err = 0;
1920out_put:
1921 thread__put(thread);
1922 return err;
598d02c5
SF
1923}
1924
e6001980 1925static void trace__set_base_time(struct trace *trace,
8a07a809 1926 struct perf_evsel *evsel,
e6001980
ACM
1927 struct perf_sample *sample)
1928{
8a07a809
ACM
1929 /*
1930 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1931 * and don't use sample->time unconditionally, we may end up having
1932 * some other event in the future without PERF_SAMPLE_TIME for good
1933 * reason, i.e. we may not be interested in its timestamps, just in
1934 * it taking place, picking some piece of information when it
1935 * appears in our event stream (vfs_getname comes to mind).
1936 */
1937 if (trace->base_time == 0 && !trace->full_time &&
1938 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1939 trace->base_time = sample->time;
1940}
1941
6810fc91 1942static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1943 union perf_event *event,
6810fc91
DA
1944 struct perf_sample *sample,
1945 struct perf_evsel *evsel,
1946 struct machine *machine __maybe_unused)
1947{
1948 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1949 struct thread *thread;
6810fc91
DA
1950 int err = 0;
1951
744a9719 1952 tracepoint_handler handler = evsel->handler;
6810fc91 1953
aa07df6e
DA
1954 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1955 if (thread && thread__is_filtered(thread))
ef65e96e 1956 goto out;
bdc89661 1957
e6001980 1958 trace__set_base_time(trace, evsel, sample);
6810fc91 1959
3160565f
DA
1960 if (handler) {
1961 ++trace->nr_events;
0c82adcf 1962 handler(trace, evsel, event, sample);
3160565f 1963 }
ef65e96e
ACM
1964out:
1965 thread__put(thread);
6810fc91
DA
1966 return err;
1967}
1968
1e28fe0a 1969static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1970{
1971 unsigned int rec_argc, i, j;
1972 const char **rec_argv;
1973 const char * const record_args[] = {
1974 "record",
1975 "-R",
1976 "-m", "1024",
1977 "-c", "1",
5e2485b1
DA
1978 };
1979
1e28fe0a
SF
1980 const char * const sc_args[] = { "-e", };
1981 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1982 const char * const majpf_args[] = { "-e", "major-faults" };
1983 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1984 const char * const minpf_args[] = { "-e", "minor-faults" };
1985 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1986
9aca7f17 1987 /* +1 is for the event string below */
1e28fe0a
SF
1988 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1989 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1990 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1991
1992 if (rec_argv == NULL)
1993 return -ENOMEM;
1994
1e28fe0a 1995 j = 0;
5e2485b1 1996 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1997 rec_argv[j++] = record_args[i];
1998
e281a960
SF
1999 if (trace->trace_syscalls) {
2000 for (i = 0; i < sc_args_nr; i++)
2001 rec_argv[j++] = sc_args[i];
2002
2003 /* event string may be different for older kernels - e.g., RHEL6 */
2004 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2005 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2006 else if (is_valid_tracepoint("syscalls:sys_enter"))
2007 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2008 else {
2009 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2010 return -1;
2011 }
9aca7f17 2012 }
9aca7f17 2013
1e28fe0a
SF
2014 if (trace->trace_pgfaults & TRACE_PFMAJ)
2015 for (i = 0; i < majpf_args_nr; i++)
2016 rec_argv[j++] = majpf_args[i];
2017
2018 if (trace->trace_pgfaults & TRACE_PFMIN)
2019 for (i = 0; i < minpf_args_nr; i++)
2020 rec_argv[j++] = minpf_args[i];
2021
2022 for (i = 0; i < (unsigned int)argc; i++)
2023 rec_argv[j++] = argv[i];
5e2485b1 2024
b0ad8ea6 2025 return cmd_record(j, rec_argv);
5e2485b1
DA
2026}
2027
bf2575c1
DA
2028static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2029
08c98776 2030static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2031{
ef503831 2032 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2033
2034 if (IS_ERR(evsel))
08c98776 2035 return false;
c522739d
ACM
2036
2037 if (perf_evsel__field(evsel, "pathname") == NULL) {
2038 perf_evsel__delete(evsel);
08c98776 2039 return false;
c522739d
ACM
2040 }
2041
744a9719 2042 evsel->handler = trace__vfs_getname;
c522739d 2043 perf_evlist__add(evlist, evsel);
08c98776 2044 return true;
c522739d
ACM
2045}
2046
0ae537cb 2047static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2048{
2049 struct perf_evsel *evsel;
2050 struct perf_event_attr attr = {
2051 .type = PERF_TYPE_SOFTWARE,
2052 .mmap_data = 1,
598d02c5
SF
2053 };
2054
2055 attr.config = config;
0524798c 2056 attr.sample_period = 1;
598d02c5
SF
2057
2058 event_attr_init(&attr);
2059
2060 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2061 if (evsel)
2062 evsel->handler = trace__pgfault;
598d02c5 2063
0ae537cb 2064 return evsel;
598d02c5
SF
2065}
2066
ddbb1b13
ACM
2067static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2068{
2069 const u32 type = event->header.type;
2070 struct perf_evsel *evsel;
2071
ddbb1b13
ACM
2072 if (type != PERF_RECORD_SAMPLE) {
2073 trace__process_event(trace, trace->host, event, sample);
2074 return;
2075 }
2076
2077 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2078 if (evsel == NULL) {
2079 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2080 return;
2081 }
2082
e6001980
ACM
2083 trace__set_base_time(trace, evsel, sample);
2084
ddbb1b13
ACM
2085 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2086 sample->raw_data == NULL) {
2087 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2088 perf_evsel__name(evsel), sample->tid,
2089 sample->cpu, sample->raw_size);
2090 } else {
2091 tracepoint_handler handler = evsel->handler;
2092 handler(trace, evsel, event, sample);
2093 }
2094}
2095
c27366f0
ACM
2096static int trace__add_syscall_newtp(struct trace *trace)
2097{
2098 int ret = -1;
2099 struct perf_evlist *evlist = trace->evlist;
2100 struct perf_evsel *sys_enter, *sys_exit;
2101
2102 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2103 if (sys_enter == NULL)
2104 goto out;
2105
2106 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2107 goto out_delete_sys_enter;
2108
2109 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2110 if (sys_exit == NULL)
2111 goto out_delete_sys_enter;
2112
2113 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2114 goto out_delete_sys_exit;
2115
2116 perf_evlist__add(evlist, sys_enter);
2117 perf_evlist__add(evlist, sys_exit);
2118
2ddd5c04 2119 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2120 /*
2121 * We're interested only in the user space callchain
2122 * leading to the syscall, allow overriding that for
2123 * debugging reasons using --kernel_syscall_callchains
2124 */
2125 sys_exit->attr.exclude_callchain_kernel = 1;
2126 }
2127
8b3ce757
ACM
2128 trace->syscalls.events.sys_enter = sys_enter;
2129 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2130
2131 ret = 0;
2132out:
2133 return ret;
2134
2135out_delete_sys_exit:
2136 perf_evsel__delete_priv(sys_exit);
2137out_delete_sys_enter:
2138 perf_evsel__delete_priv(sys_enter);
2139 goto out;
2140}
2141
19867b61
ACM
2142static int trace__set_ev_qualifier_filter(struct trace *trace)
2143{
2144 int err = -1;
b15d0a4c 2145 struct perf_evsel *sys_exit;
19867b61
ACM
2146 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2147 trace->ev_qualifier_ids.nr,
2148 trace->ev_qualifier_ids.entries);
2149
2150 if (filter == NULL)
2151 goto out_enomem;
2152
3541c034
MP
2153 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2154 filter)) {
b15d0a4c 2155 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2156 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2157 }
19867b61
ACM
2158
2159 free(filter);
2160out:
2161 return err;
2162out_enomem:
2163 errno = ENOMEM;
2164 goto out;
2165}
c27366f0 2166
f15eb531 2167static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2168{
14a052df 2169 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2170 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2171 int err = -1, i;
2172 unsigned long before;
f15eb531 2173 const bool forks = argc > 0;
46fb3c21 2174 bool draining = false;
514f1c67 2175
75b757ca
ACM
2176 trace->live = true;
2177
c27366f0 2178 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2179 goto out_error_raw_syscalls;
514f1c67 2180
e281a960 2181 if (trace->trace_syscalls)
08c98776 2182 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2183
0ae537cb
ACM
2184 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2185 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2186 if (pgfault_maj == NULL)
2187 goto out_error_mem;
2188 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2189 }
598d02c5 2190
0ae537cb
ACM
2191 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2192 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2193 if (pgfault_min == NULL)
2194 goto out_error_mem;
2195 perf_evlist__add(evlist, pgfault_min);
2196 }
598d02c5 2197
1302d88e 2198 if (trace->sched &&
2cc990ba
ACM
2199 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2200 trace__sched_stat_runtime))
2201 goto out_error_sched_stat_runtime;
1302d88e 2202
514f1c67
ACM
2203 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2204 if (err < 0) {
c24ff998 2205 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2206 goto out_delete_evlist;
2207 }
2208
752fde44
ACM
2209 err = trace__symbols_init(trace, evlist);
2210 if (err < 0) {
c24ff998 2211 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2212 goto out_delete_evlist;
752fde44
ACM
2213 }
2214
fde54b78
ACM
2215 perf_evlist__config(evlist, &trace->opts, NULL);
2216
0c3a6ef4
ACM
2217 if (callchain_param.enabled) {
2218 bool use_identifier = false;
2219
2220 if (trace->syscalls.events.sys_exit) {
2221 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2222 &trace->opts, &callchain_param);
2223 use_identifier = true;
2224 }
2225
2226 if (pgfault_maj) {
2227 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2228 use_identifier = true;
2229 }
2230
2231 if (pgfault_min) {
2232 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2233 use_identifier = true;
2234 }
2235
2236 if (use_identifier) {
2237 /*
2238 * Now we have evsels with different sample_ids, use
2239 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2240 * from a fixed position in each ring buffer record.
2241 *
2242 * As of this the changeset introducing this comment, this
2243 * isn't strictly needed, as the fields that can come before
2244 * PERF_SAMPLE_ID are all used, but we'll probably disable
2245 * some of those for things like copying the payload of
2246 * pointer syscall arguments, and for vfs_getname we don't
2247 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2248 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2249 */
2250 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2251 perf_evlist__reset_sample_bit(evlist, ID);
2252 }
fde54b78 2253 }
514f1c67 2254
f15eb531
NK
2255 signal(SIGCHLD, sig_handler);
2256 signal(SIGINT, sig_handler);
2257
2258 if (forks) {
6ef73ec4 2259 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2260 argv, false, NULL);
f15eb531 2261 if (err < 0) {
c24ff998 2262 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2263 goto out_delete_evlist;
f15eb531
NK
2264 }
2265 }
2266
514f1c67 2267 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2268 if (err < 0)
2269 goto out_error_open;
514f1c67 2270
ba504235
WN
2271 err = bpf__apply_obj_config();
2272 if (err) {
2273 char errbuf[BUFSIZ];
2274
2275 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2276 pr_err("ERROR: Apply config to BPF failed: %s\n",
2277 errbuf);
2278 goto out_error_open;
2279 }
2280
241b057c
ACM
2281 /*
2282 * Better not use !target__has_task() here because we need to cover the
2283 * case where no threads were specified in the command line, but a
2284 * workload was, and in that case we will fill in the thread_map when
2285 * we fork the workload in perf_evlist__prepare_workload.
2286 */
f078c385
ACM
2287 if (trace->filter_pids.nr > 0)
2288 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2289 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2290 err = perf_evlist__set_filter_pid(evlist, getpid());
2291
94ad89bc
ACM
2292 if (err < 0)
2293 goto out_error_mem;
2294
19867b61
ACM
2295 if (trace->ev_qualifier_ids.nr > 0) {
2296 err = trace__set_ev_qualifier_filter(trace);
2297 if (err < 0)
2298 goto out_errno;
19867b61 2299
2e5e5f87
ACM
2300 pr_debug("event qualifier tracepoint filter: %s\n",
2301 trace->syscalls.events.sys_exit->filter);
2302 }
19867b61 2303
94ad89bc
ACM
2304 err = perf_evlist__apply_filters(evlist, &evsel);
2305 if (err < 0)
2306 goto out_error_apply_filters;
241b057c 2307
f885037e 2308 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2309 if (err < 0)
2310 goto out_error_mmap;
514f1c67 2311
e36b7821 2312 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2313 perf_evlist__enable(evlist);
2314
f15eb531
NK
2315 if (forks)
2316 perf_evlist__start_workload(evlist);
2317
e36b7821
AB
2318 if (trace->opts.initial_delay) {
2319 usleep(trace->opts.initial_delay * 1000);
2320 perf_evlist__enable(evlist);
2321 }
2322
e13798c7 2323 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2324 evlist->threads->nr > 1 ||
2325 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2326again:
efd5745e 2327 before = trace->nr_events;
514f1c67
ACM
2328
2329 for (i = 0; i < evlist->nr_mmaps; i++) {
2330 union perf_event *event;
2331
2332 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2333 struct perf_sample sample;
514f1c67 2334
efd5745e 2335 ++trace->nr_events;
514f1c67 2336
514f1c67
ACM
2337 err = perf_evlist__parse_sample(evlist, event, &sample);
2338 if (err) {
c24ff998 2339 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2340 goto next_event;
514f1c67
ACM
2341 }
2342
ddbb1b13 2343 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2344next_event:
2345 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2346
ba209f85
ACM
2347 if (interrupted)
2348 goto out_disable;
02ac5421
ACM
2349
2350 if (done && !draining) {
2351 perf_evlist__disable(evlist);
2352 draining = true;
2353 }
514f1c67
ACM
2354 }
2355 }
2356
efd5745e 2357 if (trace->nr_events == before) {
ba209f85 2358 int timeout = done ? 100 : -1;
f15eb531 2359
46fb3c21
ACM
2360 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2361 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2362 draining = true;
2363
ba209f85 2364 goto again;
46fb3c21 2365 }
ba209f85
ACM
2366 } else {
2367 goto again;
f15eb531
NK
2368 }
2369
ba209f85 2370out_disable:
f3b623b8
ACM
2371 thread__zput(trace->current);
2372
ba209f85 2373 perf_evlist__disable(evlist);
514f1c67 2374
c522739d
ACM
2375 if (!err) {
2376 if (trace->summary)
2377 trace__fprintf_thread_summary(trace, trace->output);
2378
2379 if (trace->show_tool_stats) {
2380 fprintf(trace->output, "Stats:\n "
2381 " vfs_getname : %" PRIu64 "\n"
2382 " proc_getname: %" PRIu64 "\n",
2383 trace->stats.vfs_getname,
2384 trace->stats.proc_getname);
2385 }
2386 }
bf2575c1 2387
514f1c67
ACM
2388out_delete_evlist:
2389 perf_evlist__delete(evlist);
14a052df 2390 trace->evlist = NULL;
75b757ca 2391 trace->live = false;
514f1c67 2392 return err;
6ef068cb
ACM
2393{
2394 char errbuf[BUFSIZ];
a8f23d8f 2395
2cc990ba 2396out_error_sched_stat_runtime:
988bdb31 2397 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2398 goto out_error;
2399
801c67b0 2400out_error_raw_syscalls:
988bdb31 2401 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2402 goto out_error;
2403
e09b18d4
ACM
2404out_error_mmap:
2405 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2406 goto out_error;
2407
a8f23d8f
ACM
2408out_error_open:
2409 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2410
2411out_error:
6ef068cb 2412 fprintf(trace->output, "%s\n", errbuf);
87f91868 2413 goto out_delete_evlist;
94ad89bc
ACM
2414
2415out_error_apply_filters:
2416 fprintf(trace->output,
2417 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2418 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2419 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2420 goto out_delete_evlist;
514f1c67 2421}
5ed08dae
ACM
2422out_error_mem:
2423 fprintf(trace->output, "Not enough memory to run!\n");
2424 goto out_delete_evlist;
19867b61
ACM
2425
2426out_errno:
2427 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2428 goto out_delete_evlist;
a8f23d8f 2429}
514f1c67 2430
6810fc91
DA
2431static int trace__replay(struct trace *trace)
2432{
2433 const struct perf_evsel_str_handler handlers[] = {
c522739d 2434 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2435 };
f5fc1412
JO
2436 struct perf_data_file file = {
2437 .path = input_name,
2438 .mode = PERF_DATA_MODE_READ,
e366a6d8 2439 .force = trace->force,
f5fc1412 2440 };
6810fc91 2441 struct perf_session *session;
003824e8 2442 struct perf_evsel *evsel;
6810fc91
DA
2443 int err = -1;
2444
2445 trace->tool.sample = trace__process_sample;
2446 trace->tool.mmap = perf_event__process_mmap;
384c671e 2447 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2448 trace->tool.comm = perf_event__process_comm;
2449 trace->tool.exit = perf_event__process_exit;
2450 trace->tool.fork = perf_event__process_fork;
2451 trace->tool.attr = perf_event__process_attr;
f3b3614a 2452 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2453 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2454 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2455
0a8cb85c 2456 trace->tool.ordered_events = true;
6810fc91
DA
2457 trace->tool.ordering_requires_timestamps = true;
2458
2459 /* add tid to output */
2460 trace->multiple_threads = true;
2461
f5fc1412 2462 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2463 if (session == NULL)
52e02834 2464 return -1;
6810fc91 2465
aa07df6e
DA
2466 if (trace->opts.target.pid)
2467 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2468
2469 if (trace->opts.target.tid)
2470 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2471
0a7e6d1b 2472 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2473 goto out;
2474
8fb598e5
DA
2475 trace->host = &session->machines.host;
2476
6810fc91
DA
2477 err = perf_session__set_tracepoints_handlers(session, handlers);
2478 if (err)
2479 goto out;
2480
003824e8
NK
2481 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2482 "raw_syscalls:sys_enter");
9aca7f17
DA
2483 /* older kernels have syscalls tp versus raw_syscalls */
2484 if (evsel == NULL)
2485 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2486 "syscalls:sys_enter");
003824e8 2487
e281a960
SF
2488 if (evsel &&
2489 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2490 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2491 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2492 goto out;
2493 }
2494
2495 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2496 "raw_syscalls:sys_exit");
9aca7f17
DA
2497 if (evsel == NULL)
2498 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2499 "syscalls:sys_exit");
e281a960
SF
2500 if (evsel &&
2501 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2502 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2503 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2504 goto out;
2505 }
2506
e5cadb93 2507 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2508 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2509 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2510 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2511 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2512 evsel->handler = trace__pgfault;
2513 }
2514
6810fc91
DA
2515 setup_pager();
2516
b7b61cbe 2517 err = perf_session__process_events(session);
6810fc91
DA
2518 if (err)
2519 pr_err("Failed to process events, error %d", err);
2520
bf2575c1
DA
2521 else if (trace->summary)
2522 trace__fprintf_thread_summary(trace, trace->output);
2523
6810fc91
DA
2524out:
2525 perf_session__delete(session);
2526
2527 return err;
2528}
2529
1302d88e
ACM
2530static size_t trace__fprintf_threads_header(FILE *fp)
2531{
2532 size_t printed;
2533
99ff7150 2534 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2535
2536 return printed;
2537}
2538
b535d523
ACM
2539DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2540 struct stats *stats;
2541 double msecs;
2542 int syscall;
2543)
2544{
2545 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2546 struct stats *stats = source->priv;
2547
2548 entry->syscall = source->i;
2549 entry->stats = stats;
2550 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2551}
2552
bf2575c1
DA
2553static size_t thread__dump_stats(struct thread_trace *ttrace,
2554 struct trace *trace, FILE *fp)
2555{
bf2575c1
DA
2556 size_t printed = 0;
2557 struct syscall *sc;
b535d523
ACM
2558 struct rb_node *nd;
2559 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2560
b535d523 2561 if (syscall_stats == NULL)
bf2575c1
DA
2562 return 0;
2563
2564 printed += fprintf(fp, "\n");
2565
834fd46d
MW
2566 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2567 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2568 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2569
98a91837 2570 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2571 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2572 if (stats) {
2573 double min = (double)(stats->min) / NSEC_PER_MSEC;
2574 double max = (double)(stats->max) / NSEC_PER_MSEC;
2575 double avg = avg_stats(stats);
2576 double pct;
2577 u64 n = (u64) stats->n;
2578
2579 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2580 avg /= NSEC_PER_MSEC;
2581
b535d523 2582 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2583 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2584 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2585 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2586 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2587 }
bf2575c1
DA
2588 }
2589
b535d523 2590 resort_rb__delete(syscall_stats);
bf2575c1 2591 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2592
2593 return printed;
2594}
2595
96c14451 2596static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2597{
96c14451 2598 size_t printed = 0;
89dceb22 2599 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2600 double ratio;
2601
2602 if (ttrace == NULL)
2603 return 0;
2604
2605 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2606
15e65c69 2607 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2608 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2609 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2610 if (ttrace->pfmaj)
2611 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2612 if (ttrace->pfmin)
2613 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2614 if (trace->sched)
2615 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2616 else if (fputc('\n', fp) != EOF)
2617 ++printed;
2618
bf2575c1 2619 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2620
96c14451
ACM
2621 return printed;
2622}
896cbb56 2623
96c14451
ACM
2624static unsigned long thread__nr_events(struct thread_trace *ttrace)
2625{
2626 return ttrace ? ttrace->nr_events : 0;
2627}
2628
2629DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2630 struct thread *thread;
2631)
2632{
2633 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2634}
2635
1302d88e
ACM
2636static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2637{
96c14451
ACM
2638 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2639 size_t printed = trace__fprintf_threads_header(fp);
2640 struct rb_node *nd;
1302d88e 2641
96c14451
ACM
2642 if (threads == NULL) {
2643 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2644 return 0;
2645 }
2646
98a91837 2647 resort_rb__for_each_entry(nd, threads)
96c14451 2648 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2649
96c14451
ACM
2650 resort_rb__delete(threads);
2651
2652 return printed;
1302d88e
ACM
2653}
2654
ae9ed035
ACM
2655static int trace__set_duration(const struct option *opt, const char *str,
2656 int unset __maybe_unused)
2657{
2658 struct trace *trace = opt->value;
2659
2660 trace->duration_filter = atof(str);
2661 return 0;
2662}
2663
f078c385
ACM
2664static int trace__set_filter_pids(const struct option *opt, const char *str,
2665 int unset __maybe_unused)
2666{
2667 int ret = -1;
2668 size_t i;
2669 struct trace *trace = opt->value;
2670 /*
2671 * FIXME: introduce a intarray class, plain parse csv and create a
2672 * { int nr, int entries[] } struct...
2673 */
2674 struct intlist *list = intlist__new(str);
2675
2676 if (list == NULL)
2677 return -1;
2678
2679 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2680 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2681
2682 if (trace->filter_pids.entries == NULL)
2683 goto out;
2684
2685 trace->filter_pids.entries[0] = getpid();
2686
2687 for (i = 1; i < trace->filter_pids.nr; ++i)
2688 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2689
2690 intlist__delete(list);
2691 ret = 0;
2692out:
2693 return ret;
2694}
2695
c24ff998
ACM
2696static int trace__open_output(struct trace *trace, const char *filename)
2697{
2698 struct stat st;
2699
2700 if (!stat(filename, &st) && st.st_size) {
2701 char oldname[PATH_MAX];
2702
2703 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2704 unlink(oldname);
2705 rename(filename, oldname);
2706 }
2707
2708 trace->output = fopen(filename, "w");
2709
2710 return trace->output == NULL ? -errno : 0;
2711}
2712
598d02c5
SF
2713static int parse_pagefaults(const struct option *opt, const char *str,
2714 int unset __maybe_unused)
2715{
2716 int *trace_pgfaults = opt->value;
2717
2718 if (strcmp(str, "all") == 0)
2719 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2720 else if (strcmp(str, "maj") == 0)
2721 *trace_pgfaults |= TRACE_PFMAJ;
2722 else if (strcmp(str, "min") == 0)
2723 *trace_pgfaults |= TRACE_PFMIN;
2724 else
2725 return -1;
2726
2727 return 0;
2728}
2729
14a052df
ACM
2730static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2731{
2732 struct perf_evsel *evsel;
2733
e5cadb93 2734 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2735 evsel->handler = handler;
2736}
2737
017037ff
ACM
2738/*
2739 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2740 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2741 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2742 *
2743 * It'd be better to introduce a parse_options() variant that would return a
2744 * list with the terms it didn't match to an event...
2745 */
2746static int trace__parse_events_option(const struct option *opt, const char *str,
2747 int unset __maybe_unused)
2748{
2749 struct trace *trace = (struct trace *)opt->value;
2750 const char *s = str;
2751 char *sep = NULL, *lists[2] = { NULL, NULL, };
2752 int len = strlen(str), err = -1, list;
2753 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2754 char group_name[PATH_MAX];
2755
2756 if (strace_groups_dir == NULL)
2757 return -1;
2758
2759 if (*s == '!') {
2760 ++s;
2761 trace->not_ev_qualifier = true;
2762 }
2763
2764 while (1) {
2765 if ((sep = strchr(s, ',')) != NULL)
2766 *sep = '\0';
2767
2768 list = 0;
2769 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2770 list = 1;
2771 } else {
2772 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2773 if (access(group_name, R_OK) == 0)
2774 list = 1;
2775 }
2776
2777 if (lists[list]) {
2778 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2779 } else {
2780 lists[list] = malloc(len);
2781 if (lists[list] == NULL)
2782 goto out;
2783 strcpy(lists[list], s);
2784 }
2785
2786 if (!sep)
2787 break;
2788
2789 *sep = ',';
2790 s = sep + 1;
2791 }
2792
2793 if (lists[1] != NULL) {
2794 struct strlist_config slist_config = {
2795 .dirname = strace_groups_dir,
2796 };
2797
2798 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2799 if (trace->ev_qualifier == NULL) {
2800 fputs("Not enough memory to parse event qualifier", trace->output);
2801 goto out;
2802 }
2803
2804 if (trace__validate_ev_qualifier(trace))
2805 goto out;
2806 }
2807
2808 err = 0;
2809
2810 if (lists[0]) {
2811 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2812 "event selector. use 'perf list' to list available events",
2813 parse_events_option);
2814 err = parse_events_option(&o, lists[0], 0);
2815 }
2816out:
2817 if (sep)
2818 *sep = ',';
2819
2820 return err;
2821}
2822
b0ad8ea6 2823int cmd_trace(int argc, const char **argv)
514f1c67 2824{
6fdd9cb7 2825 const char *trace_usage[] = {
f15eb531
NK
2826 "perf trace [<options>] [<command>]",
2827 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2828 "perf trace record [<options>] [<command>]",
2829 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2830 NULL
2831 };
2832 struct trace trace = {
514f1c67
ACM
2833 .syscalls = {
2834 . max = -1,
2835 },
2836 .opts = {
2837 .target = {
2838 .uid = UINT_MAX,
2839 .uses_mmap = true,
2840 },
2841 .user_freq = UINT_MAX,
2842 .user_interval = ULLONG_MAX,
509051ea 2843 .no_buffering = true,
38d5447d 2844 .mmap_pages = UINT_MAX,
9d9cad76 2845 .proc_map_timeout = 500,
514f1c67 2846 },
007d66a0 2847 .output = stderr,
50c95cbd 2848 .show_comm = true,
e281a960 2849 .trace_syscalls = true,
44621819 2850 .kernel_syscallchains = false,
05614993 2851 .max_stack = UINT_MAX,
514f1c67 2852 };
c24ff998 2853 const char *output_name = NULL;
514f1c67 2854 const struct option trace_options[] = {
017037ff
ACM
2855 OPT_CALLBACK('e', "event", &trace, "event",
2856 "event/syscall selector. use 'perf list' to list available events",
2857 trace__parse_events_option),
50c95cbd
ACM
2858 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2859 "show the thread COMM next to its id"),
c522739d 2860 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2861 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2862 trace__parse_events_option),
c24ff998 2863 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2864 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2865 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2866 "trace events on existing process id"),
ac9be8ee 2867 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2868 "trace events on existing thread id"),
fa0e4ffe
ACM
2869 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2870 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2871 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2872 "system-wide collection from all CPUs"),
ac9be8ee 2873 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2874 "list of cpus to monitor"),
6810fc91 2875 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2876 "child tasks do not inherit counters"),
994a1f78
JO
2877 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2878 "number of mmap data pages",
2879 perf_evlist__parse_mmap_pages),
ac9be8ee 2880 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2881 "user to profile"),
ae9ed035
ACM
2882 OPT_CALLBACK(0, "duration", &trace, "float",
2883 "show only events with duration > N.M ms",
2884 trace__set_duration),
1302d88e 2885 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2886 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2887 OPT_BOOLEAN('T', "time", &trace.full_time,
2888 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2889 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2890 "Show only syscall summary with statistics"),
2891 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2892 "Show all syscalls and summary with statistics"),
598d02c5
SF
2893 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2894 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2895 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2896 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2897 OPT_CALLBACK(0, "call-graph", &trace.opts,
2898 "record_mode[,record_size]", record_callchain_help,
2899 &record_parse_callchain_opt),
44621819
ACM
2900 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2901 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2902 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2903 "Set the minimum stack depth when parsing the callchain, "
2904 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2905 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2906 "Set the maximum stack depth when parsing the callchain, "
2907 "anything beyond the specified depth will be ignored. "
4cb93446 2908 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2909 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2910 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2911 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2912 "ms to wait before starting measurement after program "
2913 "start"),
514f1c67
ACM
2914 OPT_END()
2915 };
ccd62a89 2916 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2917 bool mmap_pages_user_set = true;
6fdd9cb7 2918 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2919 int err;
32caf0d1 2920 char bf[BUFSIZ];
514f1c67 2921
4d08cb80
ACM
2922 signal(SIGSEGV, sighandler_dump_stack);
2923 signal(SIGFPE, sighandler_dump_stack);
2924
14a052df 2925 trace.evlist = perf_evlist__new();
fd0db102 2926 trace.sctbl = syscalltbl__new();
14a052df 2927
fd0db102 2928 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2929 pr_err("Not enough memory to run!\n");
ff8f695c 2930 err = -ENOMEM;
14a052df
ACM
2931 goto out;
2932 }
2933
6fdd9cb7
YS
2934 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2935 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2936
d7888573
WN
2937 err = bpf__setup_stdout(trace.evlist);
2938 if (err) {
2939 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2940 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2941 goto out;
2942 }
2943
59247e33
ACM
2944 err = -1;
2945
598d02c5
SF
2946 if (trace.trace_pgfaults) {
2947 trace.opts.sample_address = true;
2948 trace.opts.sample_time = true;
2949 }
2950
f3e459d1
ACM
2951 if (trace.opts.mmap_pages == UINT_MAX)
2952 mmap_pages_user_set = false;
2953
05614993 2954 if (trace.max_stack == UINT_MAX) {
fe176085 2955 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2956 max_stack_user_set = false;
2957 }
2958
2959#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2960 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2961 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2962#endif
2963
2ddd5c04 2964 if (callchain_param.enabled) {
f3e459d1
ACM
2965 if (!mmap_pages_user_set && geteuid() == 0)
2966 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2967
566a0885 2968 symbol_conf.use_callchain = true;
f3e459d1 2969 }
566a0885 2970
14a052df
ACM
2971 if (trace.evlist->nr_entries > 0)
2972 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2973
1e28fe0a
SF
2974 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2975 return trace__record(&trace, argc-1, &argv[1]);
2976
2977 /* summary_only implies summary option, but don't overwrite summary if set */
2978 if (trace.summary_only)
2979 trace.summary = trace.summary_only;
2980
726f3234
ACM
2981 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2982 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2983 pr_err("Please specify something to trace.\n");
2984 return -1;
2985 }
2986
017037ff 2987 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
2988 pr_err("The -e option can't be used with --no-syscalls.\n");
2989 goto out;
2990 }
2991
c24ff998
ACM
2992 if (output_name != NULL) {
2993 err = trace__open_output(&trace, output_name);
2994 if (err < 0) {
2995 perror("failed to create output file");
2996 goto out;
2997 }
2998 }
2999
fd0db102
ACM
3000 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3001
602ad878 3002 err = target__validate(&trace.opts.target);
32caf0d1 3003 if (err) {
602ad878 3004 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3005 fprintf(trace.output, "%s", bf);
3006 goto out_close;
32caf0d1
NK
3007 }
3008
602ad878 3009 err = target__parse_uid(&trace.opts.target);
514f1c67 3010 if (err) {
602ad878 3011 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3012 fprintf(trace.output, "%s", bf);
3013 goto out_close;
514f1c67
ACM
3014 }
3015
602ad878 3016 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3017 trace.opts.target.system_wide = true;
3018
6810fc91
DA
3019 if (input_name)
3020 err = trace__replay(&trace);
3021 else
3022 err = trace__run(&trace, argc, argv);
1302d88e 3023
c24ff998
ACM
3024out_close:
3025 if (output_name != NULL)
3026 fclose(trace.output);
3027out:
1302d88e 3028 return err;
514f1c67 3029}