]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - tools/perf/builtin-trace.c
perf tools: Move extra string util functions to util/string2.h
[mirror_ubuntu-bionic-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
fd5cead2 34#include "trace/beauty/beauty.h"
97978b3e 35#include "trace-event.h"
9aca7f17 36#include "util/parse-events.h"
ba504235 37#include "util/bpf-loader.h"
566a0885 38#include "callchain.h"
fea01392 39#include "print_binary.h"
a067558e 40#include "string2.h"
fd0db102 41#include "syscalltbl.h"
96c14451 42#include "rb_resort.h"
514f1c67 43
fd20e811 44#include <inttypes.h>
fd0db102 45#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 46#include <stdlib.h>
017037ff 47#include <string.h>
8dd2a131 48#include <linux/err.h>
997bba8c
ACM
49#include <linux/filter.h>
50#include <linux/audit.h>
877a7a11 51#include <linux/kernel.h>
39878d49 52#include <linux/random.h>
c6d4a494 53#include <linux/stringify.h>
bd48c63e 54#include <linux/time64.h>
514f1c67 55
3d689ed6
ACM
56#include "sane_ctype.h"
57
c188e7ac
ACM
58#ifndef O_CLOEXEC
59# define O_CLOEXEC 02000000
60#endif
61
d1d438a3
ACM
62struct trace {
63 struct perf_tool tool;
fd0db102 64 struct syscalltbl *sctbl;
d1d438a3
ACM
65 struct {
66 int max;
67 struct syscall *table;
68 struct {
69 struct perf_evsel *sys_enter,
70 *sys_exit;
71 } events;
72 } syscalls;
73 struct record_opts opts;
74 struct perf_evlist *evlist;
75 struct machine *host;
76 struct thread *current;
77 u64 base_time;
78 FILE *output;
79 unsigned long nr_events;
80 struct strlist *ev_qualifier;
81 struct {
82 size_t nr;
83 int *entries;
84 } ev_qualifier_ids;
d1d438a3
ACM
85 struct {
86 size_t nr;
87 pid_t *entries;
88 } filter_pids;
89 double duration_filter;
90 double runtime_ms;
91 struct {
92 u64 vfs_getname,
93 proc_getname;
94 } stats;
c6d4a494 95 unsigned int max_stack;
5cf9c84e 96 unsigned int min_stack;
d1d438a3
ACM
97 bool not_ev_qualifier;
98 bool live;
99 bool full_time;
100 bool sched;
101 bool multiple_threads;
102 bool summary;
103 bool summary_only;
104 bool show_comm;
105 bool show_tool_stats;
106 bool trace_syscalls;
44621819 107 bool kernel_syscallchains;
d1d438a3
ACM
108 bool force;
109 bool vfs_getname;
110 int trace_pgfaults;
fd0db102 111 int open_id;
d1d438a3 112};
a1c2552d 113
77170988
ACM
114struct tp_field {
115 int offset;
116 union {
117 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
118 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
119 };
120};
121
122#define TP_UINT_FIELD(bits) \
123static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
124{ \
55d43bca
DA
125 u##bits value; \
126 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
127 return value; \
77170988
ACM
128}
129
130TP_UINT_FIELD(8);
131TP_UINT_FIELD(16);
132TP_UINT_FIELD(32);
133TP_UINT_FIELD(64);
134
135#define TP_UINT_FIELD__SWAPPED(bits) \
136static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
137{ \
55d43bca
DA
138 u##bits value; \
139 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
140 return bswap_##bits(value);\
141}
142
143TP_UINT_FIELD__SWAPPED(16);
144TP_UINT_FIELD__SWAPPED(32);
145TP_UINT_FIELD__SWAPPED(64);
146
147static int tp_field__init_uint(struct tp_field *field,
148 struct format_field *format_field,
149 bool needs_swap)
150{
151 field->offset = format_field->offset;
152
153 switch (format_field->size) {
154 case 1:
155 field->integer = tp_field__u8;
156 break;
157 case 2:
158 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
159 break;
160 case 4:
161 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
162 break;
163 case 8:
164 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
165 break;
166 default:
167 return -1;
168 }
169
170 return 0;
171}
172
173static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
174{
175 return sample->raw_data + field->offset;
176}
177
178static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
179{
180 field->offset = format_field->offset;
181 field->pointer = tp_field__ptr;
182 return 0;
183}
184
185struct syscall_tp {
186 struct tp_field id;
187 union {
188 struct tp_field args, ret;
189 };
190};
191
192static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
193 struct tp_field *field,
194 const char *name)
195{
196 struct format_field *format_field = perf_evsel__field(evsel, name);
197
198 if (format_field == NULL)
199 return -1;
200
201 return tp_field__init_uint(field, format_field, evsel->needs_swap);
202}
203
204#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
205 ({ struct syscall_tp *sc = evsel->priv;\
206 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
207
208static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
209 struct tp_field *field,
210 const char *name)
211{
212 struct format_field *format_field = perf_evsel__field(evsel, name);
213
214 if (format_field == NULL)
215 return -1;
216
217 return tp_field__init_ptr(field, format_field);
218}
219
220#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
221 ({ struct syscall_tp *sc = evsel->priv;\
222 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
223
224static void perf_evsel__delete_priv(struct perf_evsel *evsel)
225{
04662523 226 zfree(&evsel->priv);
77170988
ACM
227 perf_evsel__delete(evsel);
228}
229
96695d44
NK
230static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
231{
232 evsel->priv = malloc(sizeof(struct syscall_tp));
233 if (evsel->priv != NULL) {
234 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
235 goto out_delete;
236
237 evsel->handler = handler;
238 return 0;
239 }
240
241 return -ENOMEM;
242
243out_delete:
04662523 244 zfree(&evsel->priv);
96695d44
NK
245 return -ENOENT;
246}
247
ef503831 248static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 249{
ef503831 250 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 251
9aca7f17 252 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 253 if (IS_ERR(evsel))
9aca7f17
DA
254 evsel = perf_evsel__newtp("syscalls", direction);
255
8dd2a131
JO
256 if (IS_ERR(evsel))
257 return NULL;
258
259 if (perf_evsel__init_syscall_tp(evsel, handler))
260 goto out_delete;
77170988
ACM
261
262 return evsel;
263
264out_delete:
265 perf_evsel__delete_priv(evsel);
266 return NULL;
267}
268
269#define perf_evsel__sc_tp_uint(evsel, name, sample) \
270 ({ struct syscall_tp *fields = evsel->priv; \
271 fields->name.integer(&fields->name, sample); })
272
273#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
274 ({ struct syscall_tp *fields = evsel->priv; \
275 fields->name.pointer(&fields->name, sample); })
276
1f115cb7 277struct strarray {
03e3adc9 278 int offset;
1f115cb7
ACM
279 int nr_entries;
280 const char **entries;
281};
282
283#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
284 .nr_entries = ARRAY_SIZE(array), \
285 .entries = array, \
286}
287
03e3adc9
ACM
288#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
289 .offset = off, \
290 .nr_entries = ARRAY_SIZE(array), \
291 .entries = array, \
292}
293
975b7c2f
ACM
294static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
295 const char *intfmt,
296 struct syscall_arg *arg)
1f115cb7 297{
1f115cb7 298 struct strarray *sa = arg->parm;
03e3adc9 299 int idx = arg->val - sa->offset;
1f115cb7
ACM
300
301 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 302 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
303
304 return scnprintf(bf, size, "%s", sa->entries[idx]);
305}
306
975b7c2f
ACM
307static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
308 struct syscall_arg *arg)
309{
310 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
311}
312
1f115cb7
ACM
313#define SCA_STRARRAY syscall_arg__scnprintf_strarray
314
844ae5b4
ACM
315#if defined(__i386__) || defined(__x86_64__)
316/*
317 * FIXME: Make this available to all arches as soon as the ioctl beautifier
318 * gets rewritten to support all arches.
319 */
78645cf3
ACM
320static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
321 struct syscall_arg *arg)
322{
323 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
324}
325
326#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 327#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 328
75b757ca
ACM
329static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
330 struct syscall_arg *arg);
331
332#define SCA_FD syscall_arg__scnprintf_fd
333
48e1f91a
ACM
334#ifndef AT_FDCWD
335#define AT_FDCWD -100
336#endif
337
75b757ca
ACM
338static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
339 struct syscall_arg *arg)
340{
341 int fd = arg->val;
342
343 if (fd == AT_FDCWD)
344 return scnprintf(bf, size, "CWD");
345
346 return syscall_arg__scnprintf_fd(bf, size, arg);
347}
348
349#define SCA_FDAT syscall_arg__scnprintf_fd_at
350
351static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
352 struct syscall_arg *arg);
353
354#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
355
6e7eeb51 356static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 357 struct syscall_arg *arg)
13d4ff3e 358{
01533e97 359 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
360}
361
beccb2b5
ACM
362#define SCA_HEX syscall_arg__scnprintf_hex
363
a1c2552d
ACM
364static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
365 struct syscall_arg *arg)
366{
367 return scnprintf(bf, size, "%d", arg->val);
368}
369
370#define SCA_INT syscall_arg__scnprintf_int
371
729a7841
ACM
372static const char *bpf_cmd[] = {
373 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
374 "MAP_GET_NEXT_KEY", "PROG_LOAD",
375};
376static DEFINE_STRARRAY(bpf_cmd);
377
03e3adc9
ACM
378static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
379static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 380
1f115cb7
ACM
381static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
382static DEFINE_STRARRAY(itimers);
383
b62bee1b
ACM
384static const char *keyctl_options[] = {
385 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
386 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
387 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
388 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
389 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
390};
391static DEFINE_STRARRAY(keyctl_options);
392
efe6b882
ACM
393static const char *whences[] = { "SET", "CUR", "END",
394#ifdef SEEK_DATA
395"DATA",
396#endif
397#ifdef SEEK_HOLE
398"HOLE",
399#endif
400};
401static DEFINE_STRARRAY(whences);
f9da0b0c 402
80f587d5
ACM
403static const char *fcntl_cmds[] = {
404 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
405 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
406 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
407 "F_GETOWNER_UIDS",
408};
409static DEFINE_STRARRAY(fcntl_cmds);
410
c045bf02
ACM
411static const char *rlimit_resources[] = {
412 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
413 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
414 "RTTIME",
415};
416static DEFINE_STRARRAY(rlimit_resources);
417
eb5b1b14
ACM
418static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
419static DEFINE_STRARRAY(sighow);
420
4f8c1b74
DA
421static const char *clockid[] = {
422 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
423 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
424 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
425};
426static DEFINE_STRARRAY(clockid);
427
e10bce81
ACM
428static const char *socket_families[] = {
429 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
430 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
431 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
432 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
433 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
434 "ALG", "NFC", "VSOCK",
435};
436static DEFINE_STRARRAY(socket_families);
437
51108999
ACM
438static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
439 struct syscall_arg *arg)
440{
441 size_t printed = 0;
442 int mode = arg->val;
443
444 if (mode == F_OK) /* 0 */
445 return scnprintf(bf, size, "F");
446#define P_MODE(n) \
447 if (mode & n##_OK) { \
448 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
449 mode &= ~n##_OK; \
450 }
451
452 P_MODE(R);
453 P_MODE(W);
454 P_MODE(X);
455#undef P_MODE
456
457 if (mode)
458 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
459
460 return printed;
461}
462
463#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
464
f994592d
ACM
465static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
466 struct syscall_arg *arg);
467
468#define SCA_FILENAME syscall_arg__scnprintf_filename
469
46cce19b
ACM
470static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
471 struct syscall_arg *arg)
472{
473 int printed = 0, flags = arg->val;
474
475#define P_FLAG(n) \
476 if (flags & O_##n) { \
477 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
478 flags &= ~O_##n; \
479 }
480
481 P_FLAG(CLOEXEC);
482 P_FLAG(NONBLOCK);
483#undef P_FLAG
484
485 if (flags)
486 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
487
488 return printed;
489}
490
491#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
492
844ae5b4
ACM
493#if defined(__i386__) || defined(__x86_64__)
494/*
495 * FIXME: Make this available to all arches.
496 */
78645cf3
ACM
497#define TCGETS 0x5401
498
499static const char *tioctls[] = {
500 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
501 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
502 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
503 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
504 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
505 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
506 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
507 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
508 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
509 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
510 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
511 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
512 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
513 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
514 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
515};
516
517static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 518#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 519
a355a61e
ACM
520#ifndef GRND_NONBLOCK
521#define GRND_NONBLOCK 0x0001
522#endif
523#ifndef GRND_RANDOM
524#define GRND_RANDOM 0x0002
525#endif
526
39878d49
ACM
527static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
528 struct syscall_arg *arg)
529{
530 int printed = 0, flags = arg->val;
531
532#define P_FLAG(n) \
533 if (flags & GRND_##n) { \
534 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
535 flags &= ~GRND_##n; \
536 }
537
538 P_FLAG(RANDOM);
539 P_FLAG(NONBLOCK);
540#undef P_FLAG
541
542 if (flags)
543 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
544
545 return printed;
546}
547
548#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
549
453350dd
ACM
550#define STRARRAY(arg, name, array) \
551 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
552 .arg_parm = { [arg] = &strarray__##array, }
553
ea8dc3ce 554#include "trace/beauty/eventfd.c"
8bf382ce 555#include "trace/beauty/flock.c"
d5d71e86 556#include "trace/beauty/futex_op.c"
df4cb167 557#include "trace/beauty/mmap.c"
ba2f22cf 558#include "trace/beauty/mode_t.c"
a30e6259 559#include "trace/beauty/msg_flags.c"
8f48df69 560#include "trace/beauty/open_flags.c"
62de344e 561#include "trace/beauty/perf_event_open.c"
d5d71e86 562#include "trace/beauty/pid.c"
a3bca91f 563#include "trace/beauty/sched_policy.c"
f5cd95ea 564#include "trace/beauty/seccomp.c"
12199d8e 565#include "trace/beauty/signum.c"
bbf86c43 566#include "trace/beauty/socket_type.c"
7206b900 567#include "trace/beauty/waitid_options.c"
a3bca91f 568
514f1c67
ACM
569static struct syscall_fmt {
570 const char *name;
aec1930b 571 const char *alias;
01533e97 572 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 573 void *arg_parm[6];
514f1c67 574 bool errmsg;
11c8e39f 575 bool errpid;
514f1c67 576 bool timeout;
04b34729 577 bool hexret;
514f1c67 578} syscall_fmts[] = {
51108999 579 { .name = "access", .errmsg = true,
12f3ca4f 580 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 581 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 582 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
583 { .name = "brk", .hexret = true,
584 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
585 { .name = "chdir", .errmsg = true, },
586 { .name = "chmod", .errmsg = true, },
587 { .name = "chroot", .errmsg = true, },
4f8c1b74 588 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 589 { .name = "clone", .errpid = true, },
75b757ca 590 { .name = "close", .errmsg = true,
48000a1a 591 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 592 { .name = "connect", .errmsg = true, },
12f3ca4f 593 { .name = "creat", .errmsg = true, },
b6565c90
ACM
594 { .name = "dup", .errmsg = true, },
595 { .name = "dup2", .errmsg = true, },
596 { .name = "dup3", .errmsg = true, },
453350dd 597 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
598 { .name = "eventfd2", .errmsg = true,
599 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 600 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
601 { .name = "fadvise64", .errmsg = true, },
602 { .name = "fallocate", .errmsg = true, },
603 { .name = "fchdir", .errmsg = true, },
604 { .name = "fchmod", .errmsg = true, },
75b757ca 605 { .name = "fchmodat", .errmsg = true,
12f3ca4f 606 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 607 { .name = "fchown", .errmsg = true, },
75b757ca 608 { .name = "fchownat", .errmsg = true,
12f3ca4f 609 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 610 { .name = "fcntl", .errmsg = true,
b6565c90 611 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 612 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 613 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 614 { .name = "flock", .errmsg = true,
b6565c90
ACM
615 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
616 { .name = "fsetxattr", .errmsg = true, },
617 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 618 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
619 { .name = "fstatfs", .errmsg = true, },
620 { .name = "fsync", .errmsg = true, },
621 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
622 { .name = "futex", .errmsg = true,
623 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 624 { .name = "futimesat", .errmsg = true,
12f3ca4f 625 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
626 { .name = "getdents", .errmsg = true, },
627 { .name = "getdents64", .errmsg = true, },
453350dd 628 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 629 { .name = "getpid", .errpid = true, },
d1d438a3 630 { .name = "getpgid", .errpid = true, },
c65f1070 631 { .name = "getppid", .errpid = true, },
39878d49
ACM
632 { .name = "getrandom", .errmsg = true,
633 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 634 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
635 { .name = "getxattr", .errmsg = true, },
636 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 637 { .name = "ioctl", .errmsg = true,
b6565c90 638 .arg_scnprintf = {
844ae5b4
ACM
639#if defined(__i386__) || defined(__x86_64__)
640/*
641 * FIXME: Make this available to all arches.
642 */
78645cf3
ACM
643 [1] = SCA_STRHEXARRAY, /* cmd */
644 [2] = SCA_HEX, /* arg */ },
645 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
646#else
647 [2] = SCA_HEX, /* arg */ }, },
648#endif
b62bee1b 649 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
650 { .name = "kill", .errmsg = true,
651 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
652 { .name = "lchown", .errmsg = true, },
653 { .name = "lgetxattr", .errmsg = true, },
75b757ca 654 { .name = "linkat", .errmsg = true,
48000a1a 655 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
656 { .name = "listxattr", .errmsg = true, },
657 { .name = "llistxattr", .errmsg = true, },
658 { .name = "lremovexattr", .errmsg = true, },
75b757ca 659 { .name = "lseek", .errmsg = true,
b6565c90 660 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 661 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
662 { .name = "lsetxattr", .errmsg = true, },
663 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
664 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
665 { .name = "madvise", .errmsg = true,
666 .arg_scnprintf = { [0] = SCA_HEX, /* start */
667 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 668 { .name = "mkdir", .errmsg = true, },
75b757ca 669 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
670 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
671 { .name = "mknod", .errmsg = true, },
75b757ca 672 { .name = "mknodat", .errmsg = true,
12f3ca4f 673 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
674 { .name = "mlock", .errmsg = true,
675 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
676 { .name = "mlockall", .errmsg = true,
677 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 678 { .name = "mmap", .hexret = true,
ae685380 679 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 680 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 681 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 682 { .name = "mprotect", .errmsg = true,
ae685380
ACM
683 .arg_scnprintf = { [0] = SCA_HEX, /* start */
684 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
685 { .name = "mq_unlink", .errmsg = true,
686 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
687 { .name = "mremap", .hexret = true,
688 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 689 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 690 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
691 { .name = "munlock", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
693 { .name = "munmap", .errmsg = true,
694 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 695 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 696 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 697 { .name = "newfstatat", .errmsg = true,
12f3ca4f 698 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 699 { .name = "open", .errmsg = true,
12f3ca4f 700 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 701 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
702 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
703 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 704 { .name = "openat", .errmsg = true,
75b757ca
ACM
705 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
706 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 707 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 708 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
709 [3] = SCA_FD, /* group_fd */
710 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
711 { .name = "pipe2", .errmsg = true,
712 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
713 { .name = "poll", .errmsg = true, .timeout = true, },
714 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
715 { .name = "pread", .errmsg = true, .alias = "pread64", },
716 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 717 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
718 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
719 { .name = "pwritev", .errmsg = true, },
720 { .name = "read", .errmsg = true, },
12f3ca4f 721 { .name = "readlink", .errmsg = true, },
75b757ca 722 { .name = "readlinkat", .errmsg = true,
12f3ca4f 723 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 724 { .name = "readv", .errmsg = true, },
b2cc99fd 725 { .name = "recvfrom", .errmsg = true,
b6565c90 726 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 727 { .name = "recvmmsg", .errmsg = true,
b6565c90 728 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 729 { .name = "recvmsg", .errmsg = true,
b6565c90 730 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 731 { .name = "removexattr", .errmsg = true, },
75b757ca 732 { .name = "renameat", .errmsg = true,
48000a1a 733 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 734 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
735 { .name = "rt_sigaction", .errmsg = true,
736 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 737 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
738 { .name = "rt_sigqueueinfo", .errmsg = true,
739 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
740 { .name = "rt_tgsigqueueinfo", .errmsg = true,
741 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
742 { .name = "sched_getattr", .errmsg = true, },
743 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
744 { .name = "sched_setscheduler", .errmsg = true,
745 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
746 { .name = "seccomp", .errmsg = true,
747 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
748 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 749 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 750 { .name = "sendmmsg", .errmsg = true,
b6565c90 751 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 752 { .name = "sendmsg", .errmsg = true,
b6565c90 753 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 754 { .name = "sendto", .errmsg = true,
b6565c90 755 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 756 { .name = "set_tid_address", .errpid = true, },
453350dd 757 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 758 { .name = "setpgid", .errmsg = true, },
453350dd 759 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 760 { .name = "setxattr", .errmsg = true, },
b6565c90 761 { .name = "shutdown", .errmsg = true, },
e10bce81 762 { .name = "socket", .errmsg = true,
a28b24b2
ACM
763 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
764 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
765 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
766 { .name = "socketpair", .errmsg = true,
767 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
768 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 769 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
770 { .name = "stat", .errmsg = true, .alias = "newstat", },
771 { .name = "statfs", .errmsg = true, },
fd5cead2
ACM
772 { .name = "statx", .errmsg = true,
773 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
774 [2] = SCA_STATX_FLAGS, /* flags */
775 [3] = SCA_STATX_MASK, /* mask */ }, },
34221118
ACM
776 { .name = "swapoff", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
778 { .name = "swapon", .errmsg = true,
779 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 780 { .name = "symlinkat", .errmsg = true,
48000a1a 781 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
782 { .name = "tgkill", .errmsg = true,
783 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
784 { .name = "tkill", .errmsg = true,
785 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 786 { .name = "truncate", .errmsg = true, },
e5959683 787 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 788 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
789 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
790 { .name = "utime", .errmsg = true, },
75b757ca 791 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
792 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
793 { .name = "utimes", .errmsg = true, },
b6565c90 794 { .name = "vmsplice", .errmsg = true, },
11c8e39f 795 { .name = "wait4", .errpid = true,
7206b900 796 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 797 { .name = "waitid", .errpid = true,
7206b900 798 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
799 { .name = "write", .errmsg = true, },
800 { .name = "writev", .errmsg = true, },
514f1c67
ACM
801};
802
803static int syscall_fmt__cmp(const void *name, const void *fmtp)
804{
805 const struct syscall_fmt *fmt = fmtp;
806 return strcmp(name, fmt->name);
807}
808
809static struct syscall_fmt *syscall_fmt__find(const char *name)
810{
811 const int nmemb = ARRAY_SIZE(syscall_fmts);
812 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
813}
814
815struct syscall {
816 struct event_format *tp_format;
f208bd8d
ACM
817 int nr_args;
818 struct format_field *args;
514f1c67 819 const char *name;
5089f20e 820 bool is_exit;
514f1c67 821 struct syscall_fmt *fmt;
01533e97 822 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 823 void **arg_parm;
514f1c67
ACM
824};
825
fd2b2975
ACM
826/*
827 * We need to have this 'calculated' boolean because in some cases we really
828 * don't know what is the duration of a syscall, for instance, when we start
829 * a session and some threads are waiting for a syscall to finish, say 'poll',
830 * in which case all we can do is to print "( ? ) for duration and for the
831 * start timestamp.
832 */
833static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
834{
835 double duration = (double)t / NSEC_PER_MSEC;
836 size_t printed = fprintf(fp, "(");
837
fd2b2975
ACM
838 if (!calculated)
839 printed += fprintf(fp, " ? ");
840 else if (duration >= 1.0)
60c907ab
ACM
841 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
842 else if (duration >= 0.01)
843 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
844 else
845 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 846 return printed + fprintf(fp, "): ");
60c907ab
ACM
847}
848
f994592d
ACM
849/**
850 * filename.ptr: The filename char pointer that will be vfs_getname'd
851 * filename.entry_str_pos: Where to insert the string translated from
852 * filename.ptr by the vfs_getname tracepoint/kprobe.
853 */
752fde44
ACM
854struct thread_trace {
855 u64 entry_time;
752fde44 856 bool entry_pending;
efd5745e 857 unsigned long nr_events;
a2ea67d7 858 unsigned long pfmaj, pfmin;
752fde44 859 char *entry_str;
1302d88e 860 double runtime_ms;
f994592d
ACM
861 struct {
862 unsigned long ptr;
7f4f8001
ACM
863 short int entry_str_pos;
864 bool pending_open;
865 unsigned int namelen;
866 char *name;
f994592d 867 } filename;
75b757ca
ACM
868 struct {
869 int max;
870 char **table;
871 } paths;
bf2575c1
DA
872
873 struct intlist *syscall_stats;
752fde44
ACM
874};
875
876static struct thread_trace *thread_trace__new(void)
877{
75b757ca
ACM
878 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
879
880 if (ttrace)
881 ttrace->paths.max = -1;
882
bf2575c1
DA
883 ttrace->syscall_stats = intlist__new(NULL);
884
75b757ca 885 return ttrace;
752fde44
ACM
886}
887
c24ff998 888static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 889{
efd5745e
ACM
890 struct thread_trace *ttrace;
891
752fde44
ACM
892 if (thread == NULL)
893 goto fail;
894
89dceb22
NK
895 if (thread__priv(thread) == NULL)
896 thread__set_priv(thread, thread_trace__new());
48000a1a 897
89dceb22 898 if (thread__priv(thread) == NULL)
752fde44
ACM
899 goto fail;
900
89dceb22 901 ttrace = thread__priv(thread);
efd5745e
ACM
902 ++ttrace->nr_events;
903
904 return ttrace;
752fde44 905fail:
c24ff998 906 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
907 "WARNING: not enough memory, dropping samples!\n");
908 return NULL;
909}
910
598d02c5
SF
911#define TRACE_PFMAJ (1 << 0)
912#define TRACE_PFMIN (1 << 1)
913
e4d44e83
ACM
914static const size_t trace__entry_str_size = 2048;
915
97119f37 916static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 917{
89dceb22 918 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
919
920 if (fd > ttrace->paths.max) {
921 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
922
923 if (npath == NULL)
924 return -1;
925
926 if (ttrace->paths.max != -1) {
927 memset(npath + ttrace->paths.max + 1, 0,
928 (fd - ttrace->paths.max) * sizeof(char *));
929 } else {
930 memset(npath, 0, (fd + 1) * sizeof(char *));
931 }
932
933 ttrace->paths.table = npath;
934 ttrace->paths.max = fd;
935 }
936
937 ttrace->paths.table[fd] = strdup(pathname);
938
939 return ttrace->paths.table[fd] != NULL ? 0 : -1;
940}
941
97119f37
ACM
942static int thread__read_fd_path(struct thread *thread, int fd)
943{
944 char linkname[PATH_MAX], pathname[PATH_MAX];
945 struct stat st;
946 int ret;
947
948 if (thread->pid_ == thread->tid) {
949 scnprintf(linkname, sizeof(linkname),
950 "/proc/%d/fd/%d", thread->pid_, fd);
951 } else {
952 scnprintf(linkname, sizeof(linkname),
953 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
954 }
955
956 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
957 return -1;
958
959 ret = readlink(linkname, pathname, sizeof(pathname));
960
961 if (ret < 0 || ret > st.st_size)
962 return -1;
963
964 pathname[ret] = '\0';
965 return trace__set_fd_pathname(thread, fd, pathname);
966}
967
c522739d
ACM
968static const char *thread__fd_path(struct thread *thread, int fd,
969 struct trace *trace)
75b757ca 970{
89dceb22 971 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
972
973 if (ttrace == NULL)
974 return NULL;
975
976 if (fd < 0)
977 return NULL;
978
cdcd1e6b 979 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
980 if (!trace->live)
981 return NULL;
982 ++trace->stats.proc_getname;
cdcd1e6b 983 if (thread__read_fd_path(thread, fd))
c522739d
ACM
984 return NULL;
985 }
75b757ca
ACM
986
987 return ttrace->paths.table[fd];
988}
989
990static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
991 struct syscall_arg *arg)
992{
993 int fd = arg->val;
994 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 995 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
996
997 if (path)
998 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
999
1000 return printed;
1001}
1002
1003static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1004 struct syscall_arg *arg)
1005{
1006 int fd = arg->val;
1007 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1008 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1009
04662523
ACM
1010 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1011 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1012
1013 return printed;
1014}
1015
f994592d
ACM
1016static void thread__set_filename_pos(struct thread *thread, const char *bf,
1017 unsigned long ptr)
1018{
1019 struct thread_trace *ttrace = thread__priv(thread);
1020
1021 ttrace->filename.ptr = ptr;
1022 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1023}
1024
1025static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1026 struct syscall_arg *arg)
1027{
1028 unsigned long ptr = arg->val;
1029
1030 if (!arg->trace->vfs_getname)
1031 return scnprintf(bf, size, "%#x", ptr);
1032
1033 thread__set_filename_pos(arg->thread, bf, ptr);
1034 return 0;
1035}
1036
ae9ed035
ACM
1037static bool trace__filter_duration(struct trace *trace, double t)
1038{
1039 return t < (trace->duration_filter * NSEC_PER_MSEC);
1040}
1041
fd2b2975 1042static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1043{
1044 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1045
60c907ab 1046 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1047}
1048
fd2b2975
ACM
1049/*
1050 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1051 * using ttrace->entry_time for a thread that receives a sys_exit without
1052 * first having received a sys_enter ("poll" issued before tracing session
1053 * starts, lost sys_enter exit due to ring buffer overflow).
1054 */
1055static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1056{
1057 if (tstamp > 0)
1058 return __trace__fprintf_tstamp(trace, tstamp, fp);
1059
1060 return fprintf(fp, " ? ");
1061}
1062
f15eb531 1063static bool done = false;
ba209f85 1064static bool interrupted = false;
f15eb531 1065
ba209f85 1066static void sig_handler(int sig)
f15eb531
NK
1067{
1068 done = true;
ba209f85 1069 interrupted = sig == SIGINT;
f15eb531
NK
1070}
1071
752fde44 1072static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1073 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1074{
1075 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1076 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1077
50c95cbd
ACM
1078 if (trace->multiple_threads) {
1079 if (trace->show_comm)
1902efe7 1080 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1081 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1082 }
752fde44
ACM
1083
1084 return printed;
1085}
1086
c24ff998 1087static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1088 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1089{
1090 int ret = 0;
1091
1092 switch (event->header.type) {
1093 case PERF_RECORD_LOST:
c24ff998 1094 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1095 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1096 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1097 break;
752fde44 1098 default:
162f0bef 1099 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1100 break;
1101 }
1102
1103 return ret;
1104}
1105
c24ff998 1106static int trace__tool_process(struct perf_tool *tool,
752fde44 1107 union perf_event *event,
162f0bef 1108 struct perf_sample *sample,
752fde44
ACM
1109 struct machine *machine)
1110{
c24ff998 1111 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1112 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1113}
1114
caf8a0d0
ACM
1115static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1116{
1117 struct machine *machine = vmachine;
1118
1119 if (machine->kptr_restrict_warned)
1120 return NULL;
1121
1122 if (symbol_conf.kptr_restrict) {
1123 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1124 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1125 "Kernel samples will not be resolved.\n");
1126 machine->kptr_restrict_warned = true;
1127 return NULL;
1128 }
1129
1130 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1131}
1132
752fde44
ACM
1133static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1134{
0a7e6d1b 1135 int err = symbol__init(NULL);
752fde44
ACM
1136
1137 if (err)
1138 return err;
1139
8fb598e5
DA
1140 trace->host = machine__new_host();
1141 if (trace->host == NULL)
1142 return -ENOMEM;
752fde44 1143
caf8a0d0 1144 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1145 return -errno;
1146
a33fbd56 1147 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1148 evlist->threads, trace__tool_process, false,
1149 trace->opts.proc_map_timeout);
752fde44
ACM
1150 if (err)
1151 symbol__exit();
1152
1153 return err;
1154}
1155
13d4ff3e
ACM
1156static int syscall__set_arg_fmts(struct syscall *sc)
1157{
1158 struct format_field *field;
b6565c90 1159 int idx = 0, len;
13d4ff3e 1160
f208bd8d 1161 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1162 if (sc->arg_scnprintf == NULL)
1163 return -1;
1164
1f115cb7
ACM
1165 if (sc->fmt)
1166 sc->arg_parm = sc->fmt->arg_parm;
1167
f208bd8d 1168 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1169 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1170 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1171 else if (strcmp(field->type, "const char *") == 0 &&
1172 (strcmp(field->name, "filename") == 0 ||
1173 strcmp(field->name, "path") == 0 ||
1174 strcmp(field->name, "pathname") == 0))
1175 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1176 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1177 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1178 else if (strcmp(field->type, "pid_t") == 0)
1179 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1180 else if (strcmp(field->type, "umode_t") == 0)
1181 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1182 else if ((strcmp(field->type, "int") == 0 ||
1183 strcmp(field->type, "unsigned int") == 0 ||
1184 strcmp(field->type, "long") == 0) &&
1185 (len = strlen(field->name)) >= 2 &&
1186 strcmp(field->name + len - 2, "fd") == 0) {
1187 /*
1188 * /sys/kernel/tracing/events/syscalls/sys_enter*
1189 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1190 * 65 int
1191 * 23 unsigned int
1192 * 7 unsigned long
1193 */
1194 sc->arg_scnprintf[idx] = SCA_FD;
1195 }
13d4ff3e
ACM
1196 ++idx;
1197 }
1198
1199 return 0;
1200}
1201
514f1c67
ACM
1202static int trace__read_syscall_info(struct trace *trace, int id)
1203{
1204 char tp_name[128];
1205 struct syscall *sc;
fd0db102 1206 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1207
1208 if (name == NULL)
1209 return -1;
514f1c67
ACM
1210
1211 if (id > trace->syscalls.max) {
1212 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1213
1214 if (nsyscalls == NULL)
1215 return -1;
1216
1217 if (trace->syscalls.max != -1) {
1218 memset(nsyscalls + trace->syscalls.max + 1, 0,
1219 (id - trace->syscalls.max) * sizeof(*sc));
1220 } else {
1221 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1222 }
1223
1224 trace->syscalls.table = nsyscalls;
1225 trace->syscalls.max = id;
1226 }
1227
1228 sc = trace->syscalls.table + id;
3a531260 1229 sc->name = name;
2ae3a312 1230
3a531260 1231 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1232
aec1930b 1233 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1234 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1235
8dd2a131 1236 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1237 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1238 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1239 }
514f1c67 1240
8dd2a131 1241 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1242 return -1;
1243
f208bd8d
ACM
1244 sc->args = sc->tp_format->format.fields;
1245 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1246 /*
1247 * We need to check and discard the first variable '__syscall_nr'
1248 * or 'nr' that mean the syscall number. It is needless here.
1249 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1250 */
1251 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1252 sc->args = sc->args->next;
1253 --sc->nr_args;
1254 }
1255
5089f20e
ACM
1256 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1257
13d4ff3e 1258 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1259}
1260
d0cc439b
ACM
1261static int trace__validate_ev_qualifier(struct trace *trace)
1262{
8b3ce757 1263 int err = 0, i;
d0cc439b
ACM
1264 struct str_node *pos;
1265
8b3ce757
ACM
1266 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1267 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1268 sizeof(trace->ev_qualifier_ids.entries[0]));
1269
1270 if (trace->ev_qualifier_ids.entries == NULL) {
1271 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1272 trace->output);
1273 err = -EINVAL;
1274 goto out;
1275 }
1276
1277 i = 0;
1278
602a1f4d 1279 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1280 const char *sc = pos->s;
fd0db102 1281 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1282
8b3ce757 1283 if (id < 0) {
d0cc439b
ACM
1284 if (err == 0) {
1285 fputs("Error:\tInvalid syscall ", trace->output);
1286 err = -EINVAL;
1287 } else {
1288 fputs(", ", trace->output);
1289 }
1290
1291 fputs(sc, trace->output);
1292 }
8b3ce757
ACM
1293
1294 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1295 }
1296
1297 if (err < 0) {
1298 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1299 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1300 zfree(&trace->ev_qualifier_ids.entries);
1301 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1302 }
8b3ce757 1303out:
d0cc439b
ACM
1304 return err;
1305}
1306
55d43bca
DA
1307/*
1308 * args is to be interpreted as a series of longs but we need to handle
1309 * 8-byte unaligned accesses. args points to raw_data within the event
1310 * and raw_data is guaranteed to be 8-byte unaligned because it is
1311 * preceded by raw_size which is a u32. So we need to copy args to a temp
1312 * variable to read it. Most notably this avoids extended load instructions
1313 * on unaligned addresses
1314 */
1315
752fde44 1316static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1317 unsigned char *args, struct trace *trace,
75b757ca 1318 struct thread *thread)
514f1c67 1319{
514f1c67 1320 size_t printed = 0;
55d43bca
DA
1321 unsigned char *p;
1322 unsigned long val;
514f1c67 1323
f208bd8d 1324 if (sc->args != NULL) {
514f1c67 1325 struct format_field *field;
01533e97
ACM
1326 u8 bit = 1;
1327 struct syscall_arg arg = {
75b757ca
ACM
1328 .idx = 0,
1329 .mask = 0,
1330 .trace = trace,
1331 .thread = thread,
01533e97 1332 };
6e7eeb51 1333
f208bd8d 1334 for (field = sc->args; field;
01533e97
ACM
1335 field = field->next, ++arg.idx, bit <<= 1) {
1336 if (arg.mask & bit)
6e7eeb51 1337 continue;
55d43bca
DA
1338
1339 /* special care for unaligned accesses */
1340 p = args + sizeof(unsigned long) * arg.idx;
1341 memcpy(&val, p, sizeof(val));
1342
4aa58232
ACM
1343 /*
1344 * Suppress this argument if its value is zero and
1345 * and we don't have a string associated in an
1346 * strarray for it.
1347 */
55d43bca 1348 if (val == 0 &&
4aa58232
ACM
1349 !(sc->arg_scnprintf &&
1350 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1351 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1352 continue;
1353
752fde44 1354 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1355 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1356 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1357 arg.val = val;
1f115cb7
ACM
1358 if (sc->arg_parm)
1359 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1360 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1361 size - printed, &arg);
6e7eeb51 1362 } else {
13d4ff3e 1363 printed += scnprintf(bf + printed, size - printed,
55d43bca 1364 "%ld", val);
6e7eeb51 1365 }
514f1c67 1366 }
4c4d6e51
ACM
1367 } else if (IS_ERR(sc->tp_format)) {
1368 /*
1369 * If we managed to read the tracepoint /format file, then we
1370 * may end up not having any args, like with gettid(), so only
1371 * print the raw args when we didn't manage to read it.
1372 */
01533e97
ACM
1373 int i = 0;
1374
514f1c67 1375 while (i < 6) {
55d43bca
DA
1376 /* special care for unaligned accesses */
1377 p = args + sizeof(unsigned long) * i;
1378 memcpy(&val, p, sizeof(val));
752fde44
ACM
1379 printed += scnprintf(bf + printed, size - printed,
1380 "%sarg%d: %ld",
55d43bca 1381 printed ? ", " : "", i, val);
514f1c67
ACM
1382 ++i;
1383 }
1384 }
1385
1386 return printed;
1387}
1388
ba3d7dee 1389typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1390 union perf_event *event,
ba3d7dee
ACM
1391 struct perf_sample *sample);
1392
1393static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1394 struct perf_evsel *evsel, int id)
ba3d7dee 1395{
ba3d7dee
ACM
1396
1397 if (id < 0) {
adaa18bf
ACM
1398
1399 /*
1400 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1401 * before that, leaving at a higher verbosity level till that is
1402 * explained. Reproduced with plain ftrace with:
1403 *
1404 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1405 * grep "NR -1 " /t/trace_pipe
1406 *
1407 * After generating some load on the machine.
1408 */
1409 if (verbose > 1) {
1410 static u64 n;
1411 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1412 id, perf_evsel__name(evsel), ++n);
1413 }
ba3d7dee
ACM
1414 return NULL;
1415 }
1416
1417 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1418 trace__read_syscall_info(trace, id))
1419 goto out_cant_read;
1420
1421 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1422 goto out_cant_read;
1423
1424 return &trace->syscalls.table[id];
1425
1426out_cant_read:
bb963e16 1427 if (verbose > 0) {
7c304ee0
ACM
1428 fprintf(trace->output, "Problems reading syscall %d", id);
1429 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1430 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1431 fputs(" information\n", trace->output);
1432 }
ba3d7dee
ACM
1433 return NULL;
1434}
1435
bf2575c1
DA
1436static void thread__update_stats(struct thread_trace *ttrace,
1437 int id, struct perf_sample *sample)
1438{
1439 struct int_node *inode;
1440 struct stats *stats;
1441 u64 duration = 0;
1442
1443 inode = intlist__findnew(ttrace->syscall_stats, id);
1444 if (inode == NULL)
1445 return;
1446
1447 stats = inode->priv;
1448 if (stats == NULL) {
1449 stats = malloc(sizeof(struct stats));
1450 if (stats == NULL)
1451 return;
1452 init_stats(stats);
1453 inode->priv = stats;
1454 }
1455
1456 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1457 duration = sample->time - ttrace->entry_time;
1458
1459 update_stats(stats, duration);
1460}
1461
e596663e
ACM
1462static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1463{
1464 struct thread_trace *ttrace;
1465 u64 duration;
1466 size_t printed;
1467
1468 if (trace->current == NULL)
1469 return 0;
1470
1471 ttrace = thread__priv(trace->current);
1472
1473 if (!ttrace->entry_pending)
1474 return 0;
1475
1476 duration = sample->time - ttrace->entry_time;
1477
fd2b2975 1478 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1479 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1480 ttrace->entry_pending = false;
1481
1482 return printed;
1483}
1484
ba3d7dee 1485static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1486 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1487 struct perf_sample *sample)
1488{
752fde44 1489 char *msg;
ba3d7dee 1490 void *args;
752fde44 1491 size_t printed = 0;
2ae3a312 1492 struct thread *thread;
b91fc39f 1493 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1494 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1495 struct thread_trace *ttrace;
1496
1497 if (sc == NULL)
1498 return -1;
ba3d7dee 1499
8fb598e5 1500 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1501 ttrace = thread__trace(thread, trace->output);
2ae3a312 1502 if (ttrace == NULL)
b91fc39f 1503 goto out_put;
ba3d7dee 1504
77170988 1505 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1506
1507 if (ttrace->entry_str == NULL) {
e4d44e83 1508 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1509 if (!ttrace->entry_str)
b91fc39f 1510 goto out_put;
752fde44
ACM
1511 }
1512
5cf9c84e 1513 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1514 trace__printf_interrupted_entry(trace, sample);
e596663e 1515
752fde44
ACM
1516 ttrace->entry_time = sample->time;
1517 msg = ttrace->entry_str;
e4d44e83 1518 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1519
e4d44e83 1520 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1521 args, trace, thread);
752fde44 1522
5089f20e 1523 if (sc->is_exit) {
5cf9c84e 1524 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1525 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1526 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1527 }
7f4f8001 1528 } else {
752fde44 1529 ttrace->entry_pending = true;
7f4f8001
ACM
1530 /* See trace__vfs_getname & trace__sys_exit */
1531 ttrace->filename.pending_open = false;
1532 }
ba3d7dee 1533
f3b623b8
ACM
1534 if (trace->current != thread) {
1535 thread__put(trace->current);
1536 trace->current = thread__get(thread);
1537 }
b91fc39f
ACM
1538 err = 0;
1539out_put:
1540 thread__put(thread);
1541 return err;
ba3d7dee
ACM
1542}
1543
5cf9c84e
ACM
1544static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1545 struct perf_sample *sample,
1546 struct callchain_cursor *cursor)
202ff968
ACM
1547{
1548 struct addr_location al;
5cf9c84e
ACM
1549
1550 if (machine__resolve(trace->host, &al, sample) < 0 ||
1551 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1552 return -1;
1553
1554 return 0;
1555}
1556
1557static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1558{
202ff968 1559 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1560 const unsigned int print_opts = EVSEL__PRINT_SYM |
1561 EVSEL__PRINT_DSO |
1562 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1563
d327e60c 1564 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1565}
1566
ba3d7dee 1567static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1568 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1569 struct perf_sample *sample)
1570{
2c82c3ad 1571 long ret;
60c907ab 1572 u64 duration = 0;
fd2b2975 1573 bool duration_calculated = false;
2ae3a312 1574 struct thread *thread;
5cf9c84e 1575 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1576 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1577 struct thread_trace *ttrace;
1578
1579 if (sc == NULL)
1580 return -1;
ba3d7dee 1581
8fb598e5 1582 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1583 ttrace = thread__trace(thread, trace->output);
2ae3a312 1584 if (ttrace == NULL)
b91fc39f 1585 goto out_put;
ba3d7dee 1586
bf2575c1
DA
1587 if (trace->summary)
1588 thread__update_stats(ttrace, id, sample);
1589
77170988 1590 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1591
fd0db102 1592 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1593 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1594 ttrace->filename.pending_open = false;
c522739d
ACM
1595 ++trace->stats.vfs_getname;
1596 }
1597
ae9ed035 1598 if (ttrace->entry_time) {
60c907ab 1599 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1600 if (trace__filter_duration(trace, duration))
1601 goto out;
fd2b2975 1602 duration_calculated = true;
ae9ed035
ACM
1603 } else if (trace->duration_filter)
1604 goto out;
60c907ab 1605
5cf9c84e
ACM
1606 if (sample->callchain) {
1607 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1608 if (callchain_ret == 0) {
1609 if (callchain_cursor.nr < trace->min_stack)
1610 goto out;
1611 callchain_ret = 1;
1612 }
1613 }
1614
fd2eabaf
DA
1615 if (trace->summary_only)
1616 goto out;
1617
fd2b2975 1618 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1619
1620 if (ttrace->entry_pending) {
c24ff998 1621 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1622 } else {
c24ff998
ACM
1623 fprintf(trace->output, " ... [");
1624 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1625 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1626 }
1627
da3c9a44
ACM
1628 if (sc->fmt == NULL) {
1629signed_print:
2c82c3ad 1630 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1631 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1632 char bf[STRERR_BUFSIZE];
c8b5f2c9 1633 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1634 *e = audit_errno_to_name(-ret);
1635
c24ff998 1636 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1637 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1638 fprintf(trace->output, ") = 0 Timeout");
04b34729 1639 else if (sc->fmt->hexret)
2c82c3ad 1640 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1641 else if (sc->fmt->errpid) {
1642 struct thread *child = machine__find_thread(trace->host, ret, ret);
1643
1644 if (child != NULL) {
1645 fprintf(trace->output, ") = %ld", ret);
1646 if (child->comm_set)
1647 fprintf(trace->output, " (%s)", thread__comm_str(child));
1648 thread__put(child);
1649 }
1650 } else
da3c9a44 1651 goto signed_print;
ba3d7dee 1652
c24ff998 1653 fputc('\n', trace->output);
566a0885 1654
5cf9c84e
ACM
1655 if (callchain_ret > 0)
1656 trace__fprintf_callchain(trace, sample);
1657 else if (callchain_ret < 0)
1658 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1659out:
752fde44 1660 ttrace->entry_pending = false;
b91fc39f
ACM
1661 err = 0;
1662out_put:
1663 thread__put(thread);
1664 return err;
ba3d7dee
ACM
1665}
1666
c522739d 1667static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1668 union perf_event *event __maybe_unused,
c522739d
ACM
1669 struct perf_sample *sample)
1670{
f994592d
ACM
1671 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1672 struct thread_trace *ttrace;
1673 size_t filename_len, entry_str_len, to_move;
1674 ssize_t remaining_space;
1675 char *pos;
7f4f8001 1676 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1677
1678 if (!thread)
1679 goto out;
1680
1681 ttrace = thread__priv(thread);
1682 if (!ttrace)
ef65e96e 1683 goto out_put;
f994592d 1684
7f4f8001 1685 filename_len = strlen(filename);
39f0e7a8 1686 if (filename_len == 0)
ef65e96e 1687 goto out_put;
7f4f8001
ACM
1688
1689 if (ttrace->filename.namelen < filename_len) {
1690 char *f = realloc(ttrace->filename.name, filename_len + 1);
1691
1692 if (f == NULL)
ef65e96e 1693 goto out_put;
7f4f8001
ACM
1694
1695 ttrace->filename.namelen = filename_len;
1696 ttrace->filename.name = f;
1697 }
1698
1699 strcpy(ttrace->filename.name, filename);
1700 ttrace->filename.pending_open = true;
1701
f994592d 1702 if (!ttrace->filename.ptr)
ef65e96e 1703 goto out_put;
f994592d
ACM
1704
1705 entry_str_len = strlen(ttrace->entry_str);
1706 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1707 if (remaining_space <= 0)
ef65e96e 1708 goto out_put;
f994592d 1709
f994592d
ACM
1710 if (filename_len > (size_t)remaining_space) {
1711 filename += filename_len - remaining_space;
1712 filename_len = remaining_space;
1713 }
1714
1715 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1716 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1717 memmove(pos + filename_len, pos, to_move);
1718 memcpy(pos, filename, filename_len);
1719
1720 ttrace->filename.ptr = 0;
1721 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1722out_put:
1723 thread__put(thread);
f994592d 1724out:
c522739d
ACM
1725 return 0;
1726}
1727
1302d88e 1728static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1729 union perf_event *event __maybe_unused,
1302d88e
ACM
1730 struct perf_sample *sample)
1731{
1732 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1733 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1734 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1735 sample->pid,
1736 sample->tid);
c24ff998 1737 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1738
1739 if (ttrace == NULL)
1740 goto out_dump;
1741
1742 ttrace->runtime_ms += runtime_ms;
1743 trace->runtime_ms += runtime_ms;
ef65e96e 1744out_put:
b91fc39f 1745 thread__put(thread);
1302d88e
ACM
1746 return 0;
1747
1748out_dump:
c24ff998 1749 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1750 evsel->name,
1751 perf_evsel__strval(evsel, sample, "comm"),
1752 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1753 runtime,
1754 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1755 goto out_put;
1302d88e
ACM
1756}
1757
1d6c9407
WN
1758static void bpf_output__printer(enum binary_printer_ops op,
1759 unsigned int val, void *extra)
1760{
1761 FILE *output = extra;
1762 unsigned char ch = (unsigned char)val;
1763
1764 switch (op) {
1765 case BINARY_PRINT_CHAR_DATA:
1766 fprintf(output, "%c", isprint(ch) ? ch : '.');
1767 break;
1768 case BINARY_PRINT_DATA_BEGIN:
1769 case BINARY_PRINT_LINE_BEGIN:
1770 case BINARY_PRINT_ADDR:
1771 case BINARY_PRINT_NUM_DATA:
1772 case BINARY_PRINT_NUM_PAD:
1773 case BINARY_PRINT_SEP:
1774 case BINARY_PRINT_CHAR_PAD:
1775 case BINARY_PRINT_LINE_END:
1776 case BINARY_PRINT_DATA_END:
1777 default:
1778 break;
1779 }
1780}
1781
1782static void bpf_output__fprintf(struct trace *trace,
1783 struct perf_sample *sample)
1784{
1785 print_binary(sample->raw_data, sample->raw_size, 8,
1786 bpf_output__printer, trace->output);
1787}
1788
14a052df
ACM
1789static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1790 union perf_event *event __maybe_unused,
1791 struct perf_sample *sample)
1792{
7ad35615
ACM
1793 int callchain_ret = 0;
1794
1795 if (sample->callchain) {
1796 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1797 if (callchain_ret == 0) {
1798 if (callchain_cursor.nr < trace->min_stack)
1799 goto out;
1800 callchain_ret = 1;
1801 }
1802 }
1803
14a052df
ACM
1804 trace__printf_interrupted_entry(trace, sample);
1805 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1806
1807 if (trace->trace_syscalls)
1808 fprintf(trace->output, "( ): ");
1809
1810 fprintf(trace->output, "%s:", evsel->name);
14a052df 1811
1d6c9407
WN
1812 if (perf_evsel__is_bpf_output(evsel)) {
1813 bpf_output__fprintf(trace, sample);
1814 } else if (evsel->tp_format) {
14a052df
ACM
1815 event_format__fprintf(evsel->tp_format, sample->cpu,
1816 sample->raw_data, sample->raw_size,
1817 trace->output);
1818 }
1819
1820 fprintf(trace->output, ")\n");
202ff968 1821
7ad35615
ACM
1822 if (callchain_ret > 0)
1823 trace__fprintf_callchain(trace, sample);
1824 else if (callchain_ret < 0)
1825 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1826out:
14a052df
ACM
1827 return 0;
1828}
1829
598d02c5
SF
1830static void print_location(FILE *f, struct perf_sample *sample,
1831 struct addr_location *al,
1832 bool print_dso, bool print_sym)
1833{
1834
bb963e16 1835 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1836 fprintf(f, "%s@", al->map->dso->long_name);
1837
bb963e16 1838 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1839 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1840 al->addr - al->sym->start);
1841 else if (al->map)
4414a3c5 1842 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1843 else
4414a3c5 1844 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1845}
1846
1847static int trace__pgfault(struct trace *trace,
1848 struct perf_evsel *evsel,
473398a2 1849 union perf_event *event __maybe_unused,
598d02c5
SF
1850 struct perf_sample *sample)
1851{
1852 struct thread *thread;
598d02c5
SF
1853 struct addr_location al;
1854 char map_type = 'd';
a2ea67d7 1855 struct thread_trace *ttrace;
b91fc39f 1856 int err = -1;
1df54290 1857 int callchain_ret = 0;
598d02c5
SF
1858
1859 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1860
1861 if (sample->callchain) {
1862 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1863 if (callchain_ret == 0) {
1864 if (callchain_cursor.nr < trace->min_stack)
1865 goto out_put;
1866 callchain_ret = 1;
1867 }
1868 }
1869
a2ea67d7
SF
1870 ttrace = thread__trace(thread, trace->output);
1871 if (ttrace == NULL)
b91fc39f 1872 goto out_put;
a2ea67d7
SF
1873
1874 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1875 ttrace->pfmaj++;
1876 else
1877 ttrace->pfmin++;
1878
1879 if (trace->summary_only)
b91fc39f 1880 goto out;
598d02c5 1881
473398a2 1882 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1883 sample->ip, &al);
1884
fd2b2975 1885 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1886
1887 fprintf(trace->output, "%sfault [",
1888 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1889 "maj" : "min");
1890
1891 print_location(trace->output, sample, &al, false, true);
1892
1893 fprintf(trace->output, "] => ");
1894
473398a2 1895 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1896 sample->addr, &al);
1897
1898 if (!al.map) {
473398a2 1899 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1900 MAP__FUNCTION, sample->addr, &al);
1901
1902 if (al.map)
1903 map_type = 'x';
1904 else
1905 map_type = '?';
1906 }
1907
1908 print_location(trace->output, sample, &al, true, false);
1909
1910 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1911
1df54290
ACM
1912 if (callchain_ret > 0)
1913 trace__fprintf_callchain(trace, sample);
1914 else if (callchain_ret < 0)
1915 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1916out:
1917 err = 0;
1918out_put:
1919 thread__put(thread);
1920 return err;
598d02c5
SF
1921}
1922
e6001980 1923static void trace__set_base_time(struct trace *trace,
8a07a809 1924 struct perf_evsel *evsel,
e6001980
ACM
1925 struct perf_sample *sample)
1926{
8a07a809
ACM
1927 /*
1928 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1929 * and don't use sample->time unconditionally, we may end up having
1930 * some other event in the future without PERF_SAMPLE_TIME for good
1931 * reason, i.e. we may not be interested in its timestamps, just in
1932 * it taking place, picking some piece of information when it
1933 * appears in our event stream (vfs_getname comes to mind).
1934 */
1935 if (trace->base_time == 0 && !trace->full_time &&
1936 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1937 trace->base_time = sample->time;
1938}
1939
6810fc91 1940static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1941 union perf_event *event,
6810fc91
DA
1942 struct perf_sample *sample,
1943 struct perf_evsel *evsel,
1944 struct machine *machine __maybe_unused)
1945{
1946 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1947 struct thread *thread;
6810fc91
DA
1948 int err = 0;
1949
744a9719 1950 tracepoint_handler handler = evsel->handler;
6810fc91 1951
aa07df6e
DA
1952 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1953 if (thread && thread__is_filtered(thread))
ef65e96e 1954 goto out;
bdc89661 1955
e6001980 1956 trace__set_base_time(trace, evsel, sample);
6810fc91 1957
3160565f
DA
1958 if (handler) {
1959 ++trace->nr_events;
0c82adcf 1960 handler(trace, evsel, event, sample);
3160565f 1961 }
ef65e96e
ACM
1962out:
1963 thread__put(thread);
6810fc91
DA
1964 return err;
1965}
1966
1e28fe0a 1967static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1968{
1969 unsigned int rec_argc, i, j;
1970 const char **rec_argv;
1971 const char * const record_args[] = {
1972 "record",
1973 "-R",
1974 "-m", "1024",
1975 "-c", "1",
5e2485b1
DA
1976 };
1977
1e28fe0a
SF
1978 const char * const sc_args[] = { "-e", };
1979 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1980 const char * const majpf_args[] = { "-e", "major-faults" };
1981 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1982 const char * const minpf_args[] = { "-e", "minor-faults" };
1983 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1984
9aca7f17 1985 /* +1 is for the event string below */
1e28fe0a
SF
1986 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1987 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1988 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1989
1990 if (rec_argv == NULL)
1991 return -ENOMEM;
1992
1e28fe0a 1993 j = 0;
5e2485b1 1994 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1995 rec_argv[j++] = record_args[i];
1996
e281a960
SF
1997 if (trace->trace_syscalls) {
1998 for (i = 0; i < sc_args_nr; i++)
1999 rec_argv[j++] = sc_args[i];
2000
2001 /* event string may be different for older kernels - e.g., RHEL6 */
2002 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2003 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2004 else if (is_valid_tracepoint("syscalls:sys_enter"))
2005 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2006 else {
2007 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2008 return -1;
2009 }
9aca7f17 2010 }
9aca7f17 2011
1e28fe0a
SF
2012 if (trace->trace_pgfaults & TRACE_PFMAJ)
2013 for (i = 0; i < majpf_args_nr; i++)
2014 rec_argv[j++] = majpf_args[i];
2015
2016 if (trace->trace_pgfaults & TRACE_PFMIN)
2017 for (i = 0; i < minpf_args_nr; i++)
2018 rec_argv[j++] = minpf_args[i];
2019
2020 for (i = 0; i < (unsigned int)argc; i++)
2021 rec_argv[j++] = argv[i];
5e2485b1 2022
b0ad8ea6 2023 return cmd_record(j, rec_argv);
5e2485b1
DA
2024}
2025
bf2575c1
DA
2026static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2027
08c98776 2028static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2029{
ef503831 2030 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2031
2032 if (IS_ERR(evsel))
08c98776 2033 return false;
c522739d
ACM
2034
2035 if (perf_evsel__field(evsel, "pathname") == NULL) {
2036 perf_evsel__delete(evsel);
08c98776 2037 return false;
c522739d
ACM
2038 }
2039
744a9719 2040 evsel->handler = trace__vfs_getname;
c522739d 2041 perf_evlist__add(evlist, evsel);
08c98776 2042 return true;
c522739d
ACM
2043}
2044
0ae537cb 2045static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2046{
2047 struct perf_evsel *evsel;
2048 struct perf_event_attr attr = {
2049 .type = PERF_TYPE_SOFTWARE,
2050 .mmap_data = 1,
598d02c5
SF
2051 };
2052
2053 attr.config = config;
0524798c 2054 attr.sample_period = 1;
598d02c5
SF
2055
2056 event_attr_init(&attr);
2057
2058 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2059 if (evsel)
2060 evsel->handler = trace__pgfault;
598d02c5 2061
0ae537cb 2062 return evsel;
598d02c5
SF
2063}
2064
ddbb1b13
ACM
2065static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2066{
2067 const u32 type = event->header.type;
2068 struct perf_evsel *evsel;
2069
ddbb1b13
ACM
2070 if (type != PERF_RECORD_SAMPLE) {
2071 trace__process_event(trace, trace->host, event, sample);
2072 return;
2073 }
2074
2075 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2076 if (evsel == NULL) {
2077 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2078 return;
2079 }
2080
e6001980
ACM
2081 trace__set_base_time(trace, evsel, sample);
2082
ddbb1b13
ACM
2083 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2084 sample->raw_data == NULL) {
2085 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2086 perf_evsel__name(evsel), sample->tid,
2087 sample->cpu, sample->raw_size);
2088 } else {
2089 tracepoint_handler handler = evsel->handler;
2090 handler(trace, evsel, event, sample);
2091 }
2092}
2093
c27366f0
ACM
2094static int trace__add_syscall_newtp(struct trace *trace)
2095{
2096 int ret = -1;
2097 struct perf_evlist *evlist = trace->evlist;
2098 struct perf_evsel *sys_enter, *sys_exit;
2099
2100 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2101 if (sys_enter == NULL)
2102 goto out;
2103
2104 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2105 goto out_delete_sys_enter;
2106
2107 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2108 if (sys_exit == NULL)
2109 goto out_delete_sys_enter;
2110
2111 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2112 goto out_delete_sys_exit;
2113
2114 perf_evlist__add(evlist, sys_enter);
2115 perf_evlist__add(evlist, sys_exit);
2116
2ddd5c04 2117 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2118 /*
2119 * We're interested only in the user space callchain
2120 * leading to the syscall, allow overriding that for
2121 * debugging reasons using --kernel_syscall_callchains
2122 */
2123 sys_exit->attr.exclude_callchain_kernel = 1;
2124 }
2125
8b3ce757
ACM
2126 trace->syscalls.events.sys_enter = sys_enter;
2127 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2128
2129 ret = 0;
2130out:
2131 return ret;
2132
2133out_delete_sys_exit:
2134 perf_evsel__delete_priv(sys_exit);
2135out_delete_sys_enter:
2136 perf_evsel__delete_priv(sys_enter);
2137 goto out;
2138}
2139
19867b61
ACM
2140static int trace__set_ev_qualifier_filter(struct trace *trace)
2141{
2142 int err = -1;
b15d0a4c 2143 struct perf_evsel *sys_exit;
19867b61
ACM
2144 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2145 trace->ev_qualifier_ids.nr,
2146 trace->ev_qualifier_ids.entries);
2147
2148 if (filter == NULL)
2149 goto out_enomem;
2150
3541c034
MP
2151 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2152 filter)) {
b15d0a4c 2153 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2154 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2155 }
19867b61
ACM
2156
2157 free(filter);
2158out:
2159 return err;
2160out_enomem:
2161 errno = ENOMEM;
2162 goto out;
2163}
c27366f0 2164
f15eb531 2165static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2166{
14a052df 2167 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2168 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2169 int err = -1, i;
2170 unsigned long before;
f15eb531 2171 const bool forks = argc > 0;
46fb3c21 2172 bool draining = false;
514f1c67 2173
75b757ca
ACM
2174 trace->live = true;
2175
c27366f0 2176 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2177 goto out_error_raw_syscalls;
514f1c67 2178
e281a960 2179 if (trace->trace_syscalls)
08c98776 2180 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2181
0ae537cb
ACM
2182 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2183 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2184 if (pgfault_maj == NULL)
2185 goto out_error_mem;
2186 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2187 }
598d02c5 2188
0ae537cb
ACM
2189 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2190 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2191 if (pgfault_min == NULL)
2192 goto out_error_mem;
2193 perf_evlist__add(evlist, pgfault_min);
2194 }
598d02c5 2195
1302d88e 2196 if (trace->sched &&
2cc990ba
ACM
2197 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2198 trace__sched_stat_runtime))
2199 goto out_error_sched_stat_runtime;
1302d88e 2200
514f1c67
ACM
2201 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2202 if (err < 0) {
c24ff998 2203 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2204 goto out_delete_evlist;
2205 }
2206
752fde44
ACM
2207 err = trace__symbols_init(trace, evlist);
2208 if (err < 0) {
c24ff998 2209 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2210 goto out_delete_evlist;
752fde44
ACM
2211 }
2212
fde54b78
ACM
2213 perf_evlist__config(evlist, &trace->opts, NULL);
2214
0c3a6ef4
ACM
2215 if (callchain_param.enabled) {
2216 bool use_identifier = false;
2217
2218 if (trace->syscalls.events.sys_exit) {
2219 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2220 &trace->opts, &callchain_param);
2221 use_identifier = true;
2222 }
2223
2224 if (pgfault_maj) {
2225 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2226 use_identifier = true;
2227 }
2228
2229 if (pgfault_min) {
2230 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2231 use_identifier = true;
2232 }
2233
2234 if (use_identifier) {
2235 /*
2236 * Now we have evsels with different sample_ids, use
2237 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2238 * from a fixed position in each ring buffer record.
2239 *
2240 * As of this the changeset introducing this comment, this
2241 * isn't strictly needed, as the fields that can come before
2242 * PERF_SAMPLE_ID are all used, but we'll probably disable
2243 * some of those for things like copying the payload of
2244 * pointer syscall arguments, and for vfs_getname we don't
2245 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2246 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2247 */
2248 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2249 perf_evlist__reset_sample_bit(evlist, ID);
2250 }
fde54b78 2251 }
514f1c67 2252
f15eb531
NK
2253 signal(SIGCHLD, sig_handler);
2254 signal(SIGINT, sig_handler);
2255
2256 if (forks) {
6ef73ec4 2257 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2258 argv, false, NULL);
f15eb531 2259 if (err < 0) {
c24ff998 2260 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2261 goto out_delete_evlist;
f15eb531
NK
2262 }
2263 }
2264
514f1c67 2265 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2266 if (err < 0)
2267 goto out_error_open;
514f1c67 2268
ba504235
WN
2269 err = bpf__apply_obj_config();
2270 if (err) {
2271 char errbuf[BUFSIZ];
2272
2273 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2274 pr_err("ERROR: Apply config to BPF failed: %s\n",
2275 errbuf);
2276 goto out_error_open;
2277 }
2278
241b057c
ACM
2279 /*
2280 * Better not use !target__has_task() here because we need to cover the
2281 * case where no threads were specified in the command line, but a
2282 * workload was, and in that case we will fill in the thread_map when
2283 * we fork the workload in perf_evlist__prepare_workload.
2284 */
f078c385
ACM
2285 if (trace->filter_pids.nr > 0)
2286 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2287 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2288 err = perf_evlist__set_filter_pid(evlist, getpid());
2289
94ad89bc
ACM
2290 if (err < 0)
2291 goto out_error_mem;
2292
19867b61
ACM
2293 if (trace->ev_qualifier_ids.nr > 0) {
2294 err = trace__set_ev_qualifier_filter(trace);
2295 if (err < 0)
2296 goto out_errno;
19867b61 2297
2e5e5f87
ACM
2298 pr_debug("event qualifier tracepoint filter: %s\n",
2299 trace->syscalls.events.sys_exit->filter);
2300 }
19867b61 2301
94ad89bc
ACM
2302 err = perf_evlist__apply_filters(evlist, &evsel);
2303 if (err < 0)
2304 goto out_error_apply_filters;
241b057c 2305
f885037e 2306 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2307 if (err < 0)
2308 goto out_error_mmap;
514f1c67 2309
e36b7821 2310 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2311 perf_evlist__enable(evlist);
2312
f15eb531
NK
2313 if (forks)
2314 perf_evlist__start_workload(evlist);
2315
e36b7821
AB
2316 if (trace->opts.initial_delay) {
2317 usleep(trace->opts.initial_delay * 1000);
2318 perf_evlist__enable(evlist);
2319 }
2320
e13798c7 2321 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2322 evlist->threads->nr > 1 ||
2323 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2324again:
efd5745e 2325 before = trace->nr_events;
514f1c67
ACM
2326
2327 for (i = 0; i < evlist->nr_mmaps; i++) {
2328 union perf_event *event;
2329
2330 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2331 struct perf_sample sample;
514f1c67 2332
efd5745e 2333 ++trace->nr_events;
514f1c67 2334
514f1c67
ACM
2335 err = perf_evlist__parse_sample(evlist, event, &sample);
2336 if (err) {
c24ff998 2337 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2338 goto next_event;
514f1c67
ACM
2339 }
2340
ddbb1b13 2341 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2342next_event:
2343 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2344
ba209f85
ACM
2345 if (interrupted)
2346 goto out_disable;
02ac5421
ACM
2347
2348 if (done && !draining) {
2349 perf_evlist__disable(evlist);
2350 draining = true;
2351 }
514f1c67
ACM
2352 }
2353 }
2354
efd5745e 2355 if (trace->nr_events == before) {
ba209f85 2356 int timeout = done ? 100 : -1;
f15eb531 2357
46fb3c21
ACM
2358 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2359 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2360 draining = true;
2361
ba209f85 2362 goto again;
46fb3c21 2363 }
ba209f85
ACM
2364 } else {
2365 goto again;
f15eb531
NK
2366 }
2367
ba209f85 2368out_disable:
f3b623b8
ACM
2369 thread__zput(trace->current);
2370
ba209f85 2371 perf_evlist__disable(evlist);
514f1c67 2372
c522739d
ACM
2373 if (!err) {
2374 if (trace->summary)
2375 trace__fprintf_thread_summary(trace, trace->output);
2376
2377 if (trace->show_tool_stats) {
2378 fprintf(trace->output, "Stats:\n "
2379 " vfs_getname : %" PRIu64 "\n"
2380 " proc_getname: %" PRIu64 "\n",
2381 trace->stats.vfs_getname,
2382 trace->stats.proc_getname);
2383 }
2384 }
bf2575c1 2385
514f1c67
ACM
2386out_delete_evlist:
2387 perf_evlist__delete(evlist);
14a052df 2388 trace->evlist = NULL;
75b757ca 2389 trace->live = false;
514f1c67 2390 return err;
6ef068cb
ACM
2391{
2392 char errbuf[BUFSIZ];
a8f23d8f 2393
2cc990ba 2394out_error_sched_stat_runtime:
988bdb31 2395 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2396 goto out_error;
2397
801c67b0 2398out_error_raw_syscalls:
988bdb31 2399 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2400 goto out_error;
2401
e09b18d4
ACM
2402out_error_mmap:
2403 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2404 goto out_error;
2405
a8f23d8f
ACM
2406out_error_open:
2407 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2408
2409out_error:
6ef068cb 2410 fprintf(trace->output, "%s\n", errbuf);
87f91868 2411 goto out_delete_evlist;
94ad89bc
ACM
2412
2413out_error_apply_filters:
2414 fprintf(trace->output,
2415 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2416 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2417 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2418 goto out_delete_evlist;
514f1c67 2419}
5ed08dae
ACM
2420out_error_mem:
2421 fprintf(trace->output, "Not enough memory to run!\n");
2422 goto out_delete_evlist;
19867b61
ACM
2423
2424out_errno:
2425 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2426 goto out_delete_evlist;
a8f23d8f 2427}
514f1c67 2428
6810fc91
DA
2429static int trace__replay(struct trace *trace)
2430{
2431 const struct perf_evsel_str_handler handlers[] = {
c522739d 2432 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2433 };
f5fc1412
JO
2434 struct perf_data_file file = {
2435 .path = input_name,
2436 .mode = PERF_DATA_MODE_READ,
e366a6d8 2437 .force = trace->force,
f5fc1412 2438 };
6810fc91 2439 struct perf_session *session;
003824e8 2440 struct perf_evsel *evsel;
6810fc91
DA
2441 int err = -1;
2442
2443 trace->tool.sample = trace__process_sample;
2444 trace->tool.mmap = perf_event__process_mmap;
384c671e 2445 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2446 trace->tool.comm = perf_event__process_comm;
2447 trace->tool.exit = perf_event__process_exit;
2448 trace->tool.fork = perf_event__process_fork;
2449 trace->tool.attr = perf_event__process_attr;
f3b3614a 2450 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2451 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2452 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2453
0a8cb85c 2454 trace->tool.ordered_events = true;
6810fc91
DA
2455 trace->tool.ordering_requires_timestamps = true;
2456
2457 /* add tid to output */
2458 trace->multiple_threads = true;
2459
f5fc1412 2460 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2461 if (session == NULL)
52e02834 2462 return -1;
6810fc91 2463
aa07df6e
DA
2464 if (trace->opts.target.pid)
2465 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2466
2467 if (trace->opts.target.tid)
2468 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2469
0a7e6d1b 2470 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2471 goto out;
2472
8fb598e5
DA
2473 trace->host = &session->machines.host;
2474
6810fc91
DA
2475 err = perf_session__set_tracepoints_handlers(session, handlers);
2476 if (err)
2477 goto out;
2478
003824e8
NK
2479 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2480 "raw_syscalls:sys_enter");
9aca7f17
DA
2481 /* older kernels have syscalls tp versus raw_syscalls */
2482 if (evsel == NULL)
2483 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2484 "syscalls:sys_enter");
003824e8 2485
e281a960
SF
2486 if (evsel &&
2487 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2488 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2489 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2490 goto out;
2491 }
2492
2493 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2494 "raw_syscalls:sys_exit");
9aca7f17
DA
2495 if (evsel == NULL)
2496 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2497 "syscalls:sys_exit");
e281a960
SF
2498 if (evsel &&
2499 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2500 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2501 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2502 goto out;
2503 }
2504
e5cadb93 2505 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2506 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2507 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2508 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2509 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2510 evsel->handler = trace__pgfault;
2511 }
2512
6810fc91
DA
2513 setup_pager();
2514
b7b61cbe 2515 err = perf_session__process_events(session);
6810fc91
DA
2516 if (err)
2517 pr_err("Failed to process events, error %d", err);
2518
bf2575c1
DA
2519 else if (trace->summary)
2520 trace__fprintf_thread_summary(trace, trace->output);
2521
6810fc91
DA
2522out:
2523 perf_session__delete(session);
2524
2525 return err;
2526}
2527
1302d88e
ACM
2528static size_t trace__fprintf_threads_header(FILE *fp)
2529{
2530 size_t printed;
2531
99ff7150 2532 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2533
2534 return printed;
2535}
2536
b535d523
ACM
2537DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2538 struct stats *stats;
2539 double msecs;
2540 int syscall;
2541)
2542{
2543 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2544 struct stats *stats = source->priv;
2545
2546 entry->syscall = source->i;
2547 entry->stats = stats;
2548 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2549}
2550
bf2575c1
DA
2551static size_t thread__dump_stats(struct thread_trace *ttrace,
2552 struct trace *trace, FILE *fp)
2553{
bf2575c1
DA
2554 size_t printed = 0;
2555 struct syscall *sc;
b535d523
ACM
2556 struct rb_node *nd;
2557 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2558
b535d523 2559 if (syscall_stats == NULL)
bf2575c1
DA
2560 return 0;
2561
2562 printed += fprintf(fp, "\n");
2563
834fd46d
MW
2564 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2565 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2566 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2567
98a91837 2568 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2569 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2570 if (stats) {
2571 double min = (double)(stats->min) / NSEC_PER_MSEC;
2572 double max = (double)(stats->max) / NSEC_PER_MSEC;
2573 double avg = avg_stats(stats);
2574 double pct;
2575 u64 n = (u64) stats->n;
2576
2577 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2578 avg /= NSEC_PER_MSEC;
2579
b535d523 2580 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2581 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2582 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2583 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2584 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2585 }
bf2575c1
DA
2586 }
2587
b535d523 2588 resort_rb__delete(syscall_stats);
bf2575c1 2589 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2590
2591 return printed;
2592}
2593
96c14451 2594static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2595{
96c14451 2596 size_t printed = 0;
89dceb22 2597 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2598 double ratio;
2599
2600 if (ttrace == NULL)
2601 return 0;
2602
2603 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2604
15e65c69 2605 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2606 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2607 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2608 if (ttrace->pfmaj)
2609 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2610 if (ttrace->pfmin)
2611 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2612 if (trace->sched)
2613 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2614 else if (fputc('\n', fp) != EOF)
2615 ++printed;
2616
bf2575c1 2617 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2618
96c14451
ACM
2619 return printed;
2620}
896cbb56 2621
96c14451
ACM
2622static unsigned long thread__nr_events(struct thread_trace *ttrace)
2623{
2624 return ttrace ? ttrace->nr_events : 0;
2625}
2626
2627DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2628 struct thread *thread;
2629)
2630{
2631 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2632}
2633
1302d88e
ACM
2634static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2635{
96c14451
ACM
2636 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2637 size_t printed = trace__fprintf_threads_header(fp);
2638 struct rb_node *nd;
1302d88e 2639
96c14451
ACM
2640 if (threads == NULL) {
2641 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2642 return 0;
2643 }
2644
98a91837 2645 resort_rb__for_each_entry(nd, threads)
96c14451 2646 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2647
96c14451
ACM
2648 resort_rb__delete(threads);
2649
2650 return printed;
1302d88e
ACM
2651}
2652
ae9ed035
ACM
2653static int trace__set_duration(const struct option *opt, const char *str,
2654 int unset __maybe_unused)
2655{
2656 struct trace *trace = opt->value;
2657
2658 trace->duration_filter = atof(str);
2659 return 0;
2660}
2661
f078c385
ACM
2662static int trace__set_filter_pids(const struct option *opt, const char *str,
2663 int unset __maybe_unused)
2664{
2665 int ret = -1;
2666 size_t i;
2667 struct trace *trace = opt->value;
2668 /*
2669 * FIXME: introduce a intarray class, plain parse csv and create a
2670 * { int nr, int entries[] } struct...
2671 */
2672 struct intlist *list = intlist__new(str);
2673
2674 if (list == NULL)
2675 return -1;
2676
2677 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2678 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2679
2680 if (trace->filter_pids.entries == NULL)
2681 goto out;
2682
2683 trace->filter_pids.entries[0] = getpid();
2684
2685 for (i = 1; i < trace->filter_pids.nr; ++i)
2686 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2687
2688 intlist__delete(list);
2689 ret = 0;
2690out:
2691 return ret;
2692}
2693
c24ff998
ACM
2694static int trace__open_output(struct trace *trace, const char *filename)
2695{
2696 struct stat st;
2697
2698 if (!stat(filename, &st) && st.st_size) {
2699 char oldname[PATH_MAX];
2700
2701 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2702 unlink(oldname);
2703 rename(filename, oldname);
2704 }
2705
2706 trace->output = fopen(filename, "w");
2707
2708 return trace->output == NULL ? -errno : 0;
2709}
2710
598d02c5
SF
2711static int parse_pagefaults(const struct option *opt, const char *str,
2712 int unset __maybe_unused)
2713{
2714 int *trace_pgfaults = opt->value;
2715
2716 if (strcmp(str, "all") == 0)
2717 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2718 else if (strcmp(str, "maj") == 0)
2719 *trace_pgfaults |= TRACE_PFMAJ;
2720 else if (strcmp(str, "min") == 0)
2721 *trace_pgfaults |= TRACE_PFMIN;
2722 else
2723 return -1;
2724
2725 return 0;
2726}
2727
14a052df
ACM
2728static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2729{
2730 struct perf_evsel *evsel;
2731
e5cadb93 2732 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2733 evsel->handler = handler;
2734}
2735
017037ff
ACM
2736/*
2737 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2738 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2739 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2740 *
2741 * It'd be better to introduce a parse_options() variant that would return a
2742 * list with the terms it didn't match to an event...
2743 */
2744static int trace__parse_events_option(const struct option *opt, const char *str,
2745 int unset __maybe_unused)
2746{
2747 struct trace *trace = (struct trace *)opt->value;
2748 const char *s = str;
2749 char *sep = NULL, *lists[2] = { NULL, NULL, };
2750 int len = strlen(str), err = -1, list;
2751 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2752 char group_name[PATH_MAX];
2753
2754 if (strace_groups_dir == NULL)
2755 return -1;
2756
2757 if (*s == '!') {
2758 ++s;
2759 trace->not_ev_qualifier = true;
2760 }
2761
2762 while (1) {
2763 if ((sep = strchr(s, ',')) != NULL)
2764 *sep = '\0';
2765
2766 list = 0;
2767 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2768 list = 1;
2769 } else {
2770 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2771 if (access(group_name, R_OK) == 0)
2772 list = 1;
2773 }
2774
2775 if (lists[list]) {
2776 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2777 } else {
2778 lists[list] = malloc(len);
2779 if (lists[list] == NULL)
2780 goto out;
2781 strcpy(lists[list], s);
2782 }
2783
2784 if (!sep)
2785 break;
2786
2787 *sep = ',';
2788 s = sep + 1;
2789 }
2790
2791 if (lists[1] != NULL) {
2792 struct strlist_config slist_config = {
2793 .dirname = strace_groups_dir,
2794 };
2795
2796 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2797 if (trace->ev_qualifier == NULL) {
2798 fputs("Not enough memory to parse event qualifier", trace->output);
2799 goto out;
2800 }
2801
2802 if (trace__validate_ev_qualifier(trace))
2803 goto out;
2804 }
2805
2806 err = 0;
2807
2808 if (lists[0]) {
2809 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2810 "event selector. use 'perf list' to list available events",
2811 parse_events_option);
2812 err = parse_events_option(&o, lists[0], 0);
2813 }
2814out:
2815 if (sep)
2816 *sep = ',';
2817
2818 return err;
2819}
2820
b0ad8ea6 2821int cmd_trace(int argc, const char **argv)
514f1c67 2822{
6fdd9cb7 2823 const char *trace_usage[] = {
f15eb531
NK
2824 "perf trace [<options>] [<command>]",
2825 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2826 "perf trace record [<options>] [<command>]",
2827 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2828 NULL
2829 };
2830 struct trace trace = {
514f1c67
ACM
2831 .syscalls = {
2832 . max = -1,
2833 },
2834 .opts = {
2835 .target = {
2836 .uid = UINT_MAX,
2837 .uses_mmap = true,
2838 },
2839 .user_freq = UINT_MAX,
2840 .user_interval = ULLONG_MAX,
509051ea 2841 .no_buffering = true,
38d5447d 2842 .mmap_pages = UINT_MAX,
9d9cad76 2843 .proc_map_timeout = 500,
514f1c67 2844 },
007d66a0 2845 .output = stderr,
50c95cbd 2846 .show_comm = true,
e281a960 2847 .trace_syscalls = true,
44621819 2848 .kernel_syscallchains = false,
05614993 2849 .max_stack = UINT_MAX,
514f1c67 2850 };
c24ff998 2851 const char *output_name = NULL;
514f1c67 2852 const struct option trace_options[] = {
017037ff
ACM
2853 OPT_CALLBACK('e', "event", &trace, "event",
2854 "event/syscall selector. use 'perf list' to list available events",
2855 trace__parse_events_option),
50c95cbd
ACM
2856 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2857 "show the thread COMM next to its id"),
c522739d 2858 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2859 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2860 trace__parse_events_option),
c24ff998 2861 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2862 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2863 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2864 "trace events on existing process id"),
ac9be8ee 2865 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2866 "trace events on existing thread id"),
fa0e4ffe
ACM
2867 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2868 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2869 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2870 "system-wide collection from all CPUs"),
ac9be8ee 2871 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2872 "list of cpus to monitor"),
6810fc91 2873 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2874 "child tasks do not inherit counters"),
994a1f78
JO
2875 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2876 "number of mmap data pages",
2877 perf_evlist__parse_mmap_pages),
ac9be8ee 2878 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2879 "user to profile"),
ae9ed035
ACM
2880 OPT_CALLBACK(0, "duration", &trace, "float",
2881 "show only events with duration > N.M ms",
2882 trace__set_duration),
1302d88e 2883 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2884 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2885 OPT_BOOLEAN('T', "time", &trace.full_time,
2886 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2887 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2888 "Show only syscall summary with statistics"),
2889 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2890 "Show all syscalls and summary with statistics"),
598d02c5
SF
2891 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2892 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2893 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2894 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2895 OPT_CALLBACK(0, "call-graph", &trace.opts,
2896 "record_mode[,record_size]", record_callchain_help,
2897 &record_parse_callchain_opt),
44621819
ACM
2898 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2899 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2900 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2901 "Set the minimum stack depth when parsing the callchain, "
2902 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2903 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2904 "Set the maximum stack depth when parsing the callchain, "
2905 "anything beyond the specified depth will be ignored. "
4cb93446 2906 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2907 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2908 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2909 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2910 "ms to wait before starting measurement after program "
2911 "start"),
514f1c67
ACM
2912 OPT_END()
2913 };
ccd62a89 2914 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2915 bool mmap_pages_user_set = true;
6fdd9cb7 2916 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2917 int err;
32caf0d1 2918 char bf[BUFSIZ];
514f1c67 2919
4d08cb80
ACM
2920 signal(SIGSEGV, sighandler_dump_stack);
2921 signal(SIGFPE, sighandler_dump_stack);
2922
14a052df 2923 trace.evlist = perf_evlist__new();
fd0db102 2924 trace.sctbl = syscalltbl__new();
14a052df 2925
fd0db102 2926 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2927 pr_err("Not enough memory to run!\n");
ff8f695c 2928 err = -ENOMEM;
14a052df
ACM
2929 goto out;
2930 }
2931
6fdd9cb7
YS
2932 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2933 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2934
d7888573
WN
2935 err = bpf__setup_stdout(trace.evlist);
2936 if (err) {
2937 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2938 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2939 goto out;
2940 }
2941
59247e33
ACM
2942 err = -1;
2943
598d02c5
SF
2944 if (trace.trace_pgfaults) {
2945 trace.opts.sample_address = true;
2946 trace.opts.sample_time = true;
2947 }
2948
f3e459d1
ACM
2949 if (trace.opts.mmap_pages == UINT_MAX)
2950 mmap_pages_user_set = false;
2951
05614993 2952 if (trace.max_stack == UINT_MAX) {
fe176085 2953 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2954 max_stack_user_set = false;
2955 }
2956
2957#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2958 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2959 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2960#endif
2961
2ddd5c04 2962 if (callchain_param.enabled) {
f3e459d1
ACM
2963 if (!mmap_pages_user_set && geteuid() == 0)
2964 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2965
566a0885 2966 symbol_conf.use_callchain = true;
f3e459d1 2967 }
566a0885 2968
14a052df
ACM
2969 if (trace.evlist->nr_entries > 0)
2970 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2971
1e28fe0a
SF
2972 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2973 return trace__record(&trace, argc-1, &argv[1]);
2974
2975 /* summary_only implies summary option, but don't overwrite summary if set */
2976 if (trace.summary_only)
2977 trace.summary = trace.summary_only;
2978
726f3234
ACM
2979 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2980 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2981 pr_err("Please specify something to trace.\n");
2982 return -1;
2983 }
2984
017037ff 2985 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
2986 pr_err("The -e option can't be used with --no-syscalls.\n");
2987 goto out;
2988 }
2989
c24ff998
ACM
2990 if (output_name != NULL) {
2991 err = trace__open_output(&trace, output_name);
2992 if (err < 0) {
2993 perror("failed to create output file");
2994 goto out;
2995 }
2996 }
2997
fd0db102
ACM
2998 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2999
602ad878 3000 err = target__validate(&trace.opts.target);
32caf0d1 3001 if (err) {
602ad878 3002 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3003 fprintf(trace.output, "%s", bf);
3004 goto out_close;
32caf0d1
NK
3005 }
3006
602ad878 3007 err = target__parse_uid(&trace.opts.target);
514f1c67 3008 if (err) {
602ad878 3009 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3010 fprintf(trace.output, "%s", bf);
3011 goto out_close;
514f1c67
ACM
3012 }
3013
602ad878 3014 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3015 trace.opts.target.system_wide = true;
3016
6810fc91
DA
3017 if (input_name)
3018 err = trace__replay(&trace);
3019 else
3020 err = trace__run(&trace, argc, argv);
1302d88e 3021
c24ff998
ACM
3022out_close:
3023 if (output_name != NULL)
3024 fclose(trace.output);
3025out:
1302d88e 3026 return err;
514f1c67 3027}