]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - tools/perf/builtin-trace.c
UBUNTU: Start new release
[mirror_ubuntu-zesty-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
97978b3e 34#include "trace-event.h"
9aca7f17 35#include "util/parse-events.h"
ba504235 36#include "util/bpf-loader.h"
566a0885 37#include "callchain.h"
fd0db102 38#include "syscalltbl.h"
96c14451 39#include "rb_resort.h"
514f1c67 40
fd0db102 41#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 42#include <stdlib.h>
8dd2a131 43#include <linux/err.h>
997bba8c
ACM
44#include <linux/filter.h>
45#include <linux/audit.h>
39878d49 46#include <linux/random.h>
c6d4a494 47#include <linux/stringify.h>
bd48c63e 48#include <linux/time64.h>
514f1c67 49
c188e7ac
ACM
50#ifndef O_CLOEXEC
51# define O_CLOEXEC 02000000
52#endif
53
d1d438a3
ACM
54struct trace {
55 struct perf_tool tool;
fd0db102 56 struct syscalltbl *sctbl;
d1d438a3
ACM
57 struct {
58 int max;
59 struct syscall *table;
60 struct {
61 struct perf_evsel *sys_enter,
62 *sys_exit;
63 } events;
64 } syscalls;
65 struct record_opts opts;
66 struct perf_evlist *evlist;
67 struct machine *host;
68 struct thread *current;
69 u64 base_time;
70 FILE *output;
71 unsigned long nr_events;
72 struct strlist *ev_qualifier;
73 struct {
74 size_t nr;
75 int *entries;
76 } ev_qualifier_ids;
d1d438a3
ACM
77 struct {
78 size_t nr;
79 pid_t *entries;
80 } filter_pids;
81 double duration_filter;
82 double runtime_ms;
83 struct {
84 u64 vfs_getname,
85 proc_getname;
86 } stats;
c6d4a494 87 unsigned int max_stack;
5cf9c84e 88 unsigned int min_stack;
d1d438a3
ACM
89 bool not_ev_qualifier;
90 bool live;
91 bool full_time;
92 bool sched;
93 bool multiple_threads;
94 bool summary;
95 bool summary_only;
96 bool show_comm;
97 bool show_tool_stats;
98 bool trace_syscalls;
44621819 99 bool kernel_syscallchains;
d1d438a3
ACM
100 bool force;
101 bool vfs_getname;
102 int trace_pgfaults;
fd0db102 103 int open_id;
d1d438a3 104};
a1c2552d 105
77170988
ACM
106struct tp_field {
107 int offset;
108 union {
109 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
110 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
111 };
112};
113
114#define TP_UINT_FIELD(bits) \
115static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
116{ \
55d43bca
DA
117 u##bits value; \
118 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
119 return value; \
77170988
ACM
120}
121
122TP_UINT_FIELD(8);
123TP_UINT_FIELD(16);
124TP_UINT_FIELD(32);
125TP_UINT_FIELD(64);
126
127#define TP_UINT_FIELD__SWAPPED(bits) \
128static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
129{ \
55d43bca
DA
130 u##bits value; \
131 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
132 return bswap_##bits(value);\
133}
134
135TP_UINT_FIELD__SWAPPED(16);
136TP_UINT_FIELD__SWAPPED(32);
137TP_UINT_FIELD__SWAPPED(64);
138
139static int tp_field__init_uint(struct tp_field *field,
140 struct format_field *format_field,
141 bool needs_swap)
142{
143 field->offset = format_field->offset;
144
145 switch (format_field->size) {
146 case 1:
147 field->integer = tp_field__u8;
148 break;
149 case 2:
150 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
151 break;
152 case 4:
153 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
154 break;
155 case 8:
156 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
157 break;
158 default:
159 return -1;
160 }
161
162 return 0;
163}
164
165static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
166{
167 return sample->raw_data + field->offset;
168}
169
170static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
171{
172 field->offset = format_field->offset;
173 field->pointer = tp_field__ptr;
174 return 0;
175}
176
177struct syscall_tp {
178 struct tp_field id;
179 union {
180 struct tp_field args, ret;
181 };
182};
183
184static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
185 struct tp_field *field,
186 const char *name)
187{
188 struct format_field *format_field = perf_evsel__field(evsel, name);
189
190 if (format_field == NULL)
191 return -1;
192
193 return tp_field__init_uint(field, format_field, evsel->needs_swap);
194}
195
196#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
197 ({ struct syscall_tp *sc = evsel->priv;\
198 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
199
200static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
201 struct tp_field *field,
202 const char *name)
203{
204 struct format_field *format_field = perf_evsel__field(evsel, name);
205
206 if (format_field == NULL)
207 return -1;
208
209 return tp_field__init_ptr(field, format_field);
210}
211
212#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
213 ({ struct syscall_tp *sc = evsel->priv;\
214 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
215
216static void perf_evsel__delete_priv(struct perf_evsel *evsel)
217{
04662523 218 zfree(&evsel->priv);
77170988
ACM
219 perf_evsel__delete(evsel);
220}
221
96695d44
NK
222static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
223{
224 evsel->priv = malloc(sizeof(struct syscall_tp));
225 if (evsel->priv != NULL) {
226 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
227 goto out_delete;
228
229 evsel->handler = handler;
230 return 0;
231 }
232
233 return -ENOMEM;
234
235out_delete:
04662523 236 zfree(&evsel->priv);
96695d44
NK
237 return -ENOENT;
238}
239
ef503831 240static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 241{
ef503831 242 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 243
9aca7f17 244 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 245 if (IS_ERR(evsel))
9aca7f17
DA
246 evsel = perf_evsel__newtp("syscalls", direction);
247
8dd2a131
JO
248 if (IS_ERR(evsel))
249 return NULL;
250
251 if (perf_evsel__init_syscall_tp(evsel, handler))
252 goto out_delete;
77170988
ACM
253
254 return evsel;
255
256out_delete:
257 perf_evsel__delete_priv(evsel);
258 return NULL;
259}
260
261#define perf_evsel__sc_tp_uint(evsel, name, sample) \
262 ({ struct syscall_tp *fields = evsel->priv; \
263 fields->name.integer(&fields->name, sample); })
264
265#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
266 ({ struct syscall_tp *fields = evsel->priv; \
267 fields->name.pointer(&fields->name, sample); })
268
01533e97
ACM
269struct syscall_arg {
270 unsigned long val;
75b757ca
ACM
271 struct thread *thread;
272 struct trace *trace;
1f115cb7 273 void *parm;
01533e97
ACM
274 u8 idx;
275 u8 mask;
276};
277
1f115cb7 278struct strarray {
03e3adc9 279 int offset;
1f115cb7
ACM
280 int nr_entries;
281 const char **entries;
282};
283
284#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
285 .nr_entries = ARRAY_SIZE(array), \
286 .entries = array, \
287}
288
03e3adc9
ACM
289#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
290 .offset = off, \
291 .nr_entries = ARRAY_SIZE(array), \
292 .entries = array, \
293}
294
975b7c2f
ACM
295static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
296 const char *intfmt,
297 struct syscall_arg *arg)
1f115cb7 298{
1f115cb7 299 struct strarray *sa = arg->parm;
03e3adc9 300 int idx = arg->val - sa->offset;
1f115cb7
ACM
301
302 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 303 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
304
305 return scnprintf(bf, size, "%s", sa->entries[idx]);
306}
307
975b7c2f
ACM
308static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
309 struct syscall_arg *arg)
310{
311 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
312}
313
1f115cb7
ACM
314#define SCA_STRARRAY syscall_arg__scnprintf_strarray
315
844ae5b4
ACM
316#if defined(__i386__) || defined(__x86_64__)
317/*
318 * FIXME: Make this available to all arches as soon as the ioctl beautifier
319 * gets rewritten to support all arches.
320 */
78645cf3
ACM
321static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
322 struct syscall_arg *arg)
323{
324 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
325}
326
327#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 328#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 329
75b757ca
ACM
330static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
331 struct syscall_arg *arg);
332
333#define SCA_FD syscall_arg__scnprintf_fd
334
48e1f91a
ACM
335#ifndef AT_FDCWD
336#define AT_FDCWD -100
337#endif
338
75b757ca
ACM
339static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
340 struct syscall_arg *arg)
341{
342 int fd = arg->val;
343
344 if (fd == AT_FDCWD)
345 return scnprintf(bf, size, "CWD");
346
347 return syscall_arg__scnprintf_fd(bf, size, arg);
348}
349
350#define SCA_FDAT syscall_arg__scnprintf_fd_at
351
352static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
353 struct syscall_arg *arg);
354
355#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
356
6e7eeb51 357static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 358 struct syscall_arg *arg)
13d4ff3e 359{
01533e97 360 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
361}
362
beccb2b5
ACM
363#define SCA_HEX syscall_arg__scnprintf_hex
364
a1c2552d
ACM
365static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
366 struct syscall_arg *arg)
367{
368 return scnprintf(bf, size, "%d", arg->val);
369}
370
371#define SCA_INT syscall_arg__scnprintf_int
372
729a7841
ACM
373static const char *bpf_cmd[] = {
374 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
375 "MAP_GET_NEXT_KEY", "PROG_LOAD",
376};
377static DEFINE_STRARRAY(bpf_cmd);
378
03e3adc9
ACM
379static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
380static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 381
1f115cb7
ACM
382static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
383static DEFINE_STRARRAY(itimers);
384
b62bee1b
ACM
385static const char *keyctl_options[] = {
386 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
387 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
388 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
389 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
390 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
391};
392static DEFINE_STRARRAY(keyctl_options);
393
efe6b882
ACM
394static const char *whences[] = { "SET", "CUR", "END",
395#ifdef SEEK_DATA
396"DATA",
397#endif
398#ifdef SEEK_HOLE
399"HOLE",
400#endif
401};
402static DEFINE_STRARRAY(whences);
f9da0b0c 403
80f587d5
ACM
404static const char *fcntl_cmds[] = {
405 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
406 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
407 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
408 "F_GETOWNER_UIDS",
409};
410static DEFINE_STRARRAY(fcntl_cmds);
411
c045bf02
ACM
412static const char *rlimit_resources[] = {
413 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
414 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
415 "RTTIME",
416};
417static DEFINE_STRARRAY(rlimit_resources);
418
eb5b1b14
ACM
419static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
420static DEFINE_STRARRAY(sighow);
421
4f8c1b74
DA
422static const char *clockid[] = {
423 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
424 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
425 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
426};
427static DEFINE_STRARRAY(clockid);
428
e10bce81
ACM
429static const char *socket_families[] = {
430 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
431 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
432 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
433 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
434 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
435 "ALG", "NFC", "VSOCK",
436};
437static DEFINE_STRARRAY(socket_families);
438
51108999
ACM
439static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
440 struct syscall_arg *arg)
441{
442 size_t printed = 0;
443 int mode = arg->val;
444
445 if (mode == F_OK) /* 0 */
446 return scnprintf(bf, size, "F");
447#define P_MODE(n) \
448 if (mode & n##_OK) { \
449 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
450 mode &= ~n##_OK; \
451 }
452
453 P_MODE(R);
454 P_MODE(W);
455 P_MODE(X);
456#undef P_MODE
457
458 if (mode)
459 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
460
461 return printed;
462}
463
464#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
465
f994592d
ACM
466static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
467 struct syscall_arg *arg);
468
469#define SCA_FILENAME syscall_arg__scnprintf_filename
470
46cce19b
ACM
471static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
472 struct syscall_arg *arg)
473{
474 int printed = 0, flags = arg->val;
475
476#define P_FLAG(n) \
477 if (flags & O_##n) { \
478 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
479 flags &= ~O_##n; \
480 }
481
482 P_FLAG(CLOEXEC);
483 P_FLAG(NONBLOCK);
484#undef P_FLAG
485
486 if (flags)
487 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
488
489 return printed;
490}
491
492#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
493
844ae5b4
ACM
494#if defined(__i386__) || defined(__x86_64__)
495/*
496 * FIXME: Make this available to all arches.
497 */
78645cf3
ACM
498#define TCGETS 0x5401
499
500static const char *tioctls[] = {
501 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
502 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
503 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
504 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
505 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
506 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
507 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
508 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
509 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
510 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
511 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
512 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
513 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
514 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
515 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
516};
517
518static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 519#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 520
a355a61e
ACM
521#ifndef GRND_NONBLOCK
522#define GRND_NONBLOCK 0x0001
523#endif
524#ifndef GRND_RANDOM
525#define GRND_RANDOM 0x0002
526#endif
527
39878d49
ACM
528static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
529 struct syscall_arg *arg)
530{
531 int printed = 0, flags = arg->val;
532
533#define P_FLAG(n) \
534 if (flags & GRND_##n) { \
535 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
536 flags &= ~GRND_##n; \
537 }
538
539 P_FLAG(RANDOM);
540 P_FLAG(NONBLOCK);
541#undef P_FLAG
542
543 if (flags)
544 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
545
546 return printed;
547}
548
549#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
550
453350dd
ACM
551#define STRARRAY(arg, name, array) \
552 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
553 .arg_parm = { [arg] = &strarray__##array, }
554
ea8dc3ce 555#include "trace/beauty/eventfd.c"
8bf382ce 556#include "trace/beauty/flock.c"
d5d71e86 557#include "trace/beauty/futex_op.c"
df4cb167 558#include "trace/beauty/mmap.c"
ba2f22cf 559#include "trace/beauty/mode_t.c"
a30e6259 560#include "trace/beauty/msg_flags.c"
8f48df69 561#include "trace/beauty/open_flags.c"
62de344e 562#include "trace/beauty/perf_event_open.c"
d5d71e86 563#include "trace/beauty/pid.c"
a3bca91f 564#include "trace/beauty/sched_policy.c"
f5cd95ea 565#include "trace/beauty/seccomp.c"
12199d8e 566#include "trace/beauty/signum.c"
bbf86c43 567#include "trace/beauty/socket_type.c"
7206b900 568#include "trace/beauty/waitid_options.c"
a3bca91f 569
514f1c67
ACM
570static struct syscall_fmt {
571 const char *name;
aec1930b 572 const char *alias;
01533e97 573 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 574 void *arg_parm[6];
514f1c67 575 bool errmsg;
11c8e39f 576 bool errpid;
514f1c67 577 bool timeout;
04b34729 578 bool hexret;
514f1c67 579} syscall_fmts[] = {
51108999 580 { .name = "access", .errmsg = true,
12f3ca4f 581 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 582 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 583 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
584 { .name = "brk", .hexret = true,
585 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
586 { .name = "chdir", .errmsg = true, },
587 { .name = "chmod", .errmsg = true, },
588 { .name = "chroot", .errmsg = true, },
4f8c1b74 589 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 590 { .name = "clone", .errpid = true, },
75b757ca 591 { .name = "close", .errmsg = true,
48000a1a 592 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 593 { .name = "connect", .errmsg = true, },
12f3ca4f 594 { .name = "creat", .errmsg = true, },
b6565c90
ACM
595 { .name = "dup", .errmsg = true, },
596 { .name = "dup2", .errmsg = true, },
597 { .name = "dup3", .errmsg = true, },
453350dd 598 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
599 { .name = "eventfd2", .errmsg = true,
600 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 601 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
602 { .name = "fadvise64", .errmsg = true, },
603 { .name = "fallocate", .errmsg = true, },
604 { .name = "fchdir", .errmsg = true, },
605 { .name = "fchmod", .errmsg = true, },
75b757ca 606 { .name = "fchmodat", .errmsg = true,
12f3ca4f 607 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 608 { .name = "fchown", .errmsg = true, },
75b757ca 609 { .name = "fchownat", .errmsg = true,
12f3ca4f 610 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 611 { .name = "fcntl", .errmsg = true,
b6565c90 612 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 613 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 614 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 615 { .name = "flock", .errmsg = true,
b6565c90
ACM
616 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
617 { .name = "fsetxattr", .errmsg = true, },
618 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 619 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
620 { .name = "fstatfs", .errmsg = true, },
621 { .name = "fsync", .errmsg = true, },
622 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
623 { .name = "futex", .errmsg = true,
624 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 625 { .name = "futimesat", .errmsg = true,
12f3ca4f 626 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
627 { .name = "getdents", .errmsg = true, },
628 { .name = "getdents64", .errmsg = true, },
453350dd 629 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 630 { .name = "getpid", .errpid = true, },
d1d438a3 631 { .name = "getpgid", .errpid = true, },
c65f1070 632 { .name = "getppid", .errpid = true, },
39878d49
ACM
633 { .name = "getrandom", .errmsg = true,
634 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 635 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
636 { .name = "getxattr", .errmsg = true, },
637 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 638 { .name = "ioctl", .errmsg = true,
b6565c90 639 .arg_scnprintf = {
844ae5b4
ACM
640#if defined(__i386__) || defined(__x86_64__)
641/*
642 * FIXME: Make this available to all arches.
643 */
78645cf3
ACM
644 [1] = SCA_STRHEXARRAY, /* cmd */
645 [2] = SCA_HEX, /* arg */ },
646 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
647#else
648 [2] = SCA_HEX, /* arg */ }, },
649#endif
b62bee1b 650 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
651 { .name = "kill", .errmsg = true,
652 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
653 { .name = "lchown", .errmsg = true, },
654 { .name = "lgetxattr", .errmsg = true, },
75b757ca 655 { .name = "linkat", .errmsg = true,
48000a1a 656 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
657 { .name = "listxattr", .errmsg = true, },
658 { .name = "llistxattr", .errmsg = true, },
659 { .name = "lremovexattr", .errmsg = true, },
75b757ca 660 { .name = "lseek", .errmsg = true,
b6565c90 661 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 662 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
663 { .name = "lsetxattr", .errmsg = true, },
664 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
665 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
666 { .name = "madvise", .errmsg = true,
667 .arg_scnprintf = { [0] = SCA_HEX, /* start */
668 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 669 { .name = "mkdir", .errmsg = true, },
75b757ca 670 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
671 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
672 { .name = "mknod", .errmsg = true, },
75b757ca 673 { .name = "mknodat", .errmsg = true,
12f3ca4f 674 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
675 { .name = "mlock", .errmsg = true,
676 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
677 { .name = "mlockall", .errmsg = true,
678 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 679 { .name = "mmap", .hexret = true,
ae685380 680 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 681 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 682 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 683 { .name = "mprotect", .errmsg = true,
ae685380
ACM
684 .arg_scnprintf = { [0] = SCA_HEX, /* start */
685 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
686 { .name = "mq_unlink", .errmsg = true,
687 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
688 { .name = "mremap", .hexret = true,
689 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 690 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 691 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
692 { .name = "munlock", .errmsg = true,
693 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
694 { .name = "munmap", .errmsg = true,
695 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 696 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 697 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 698 { .name = "newfstatat", .errmsg = true,
12f3ca4f 699 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 700 { .name = "open", .errmsg = true,
12f3ca4f 701 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 702 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
703 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
704 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 705 { .name = "openat", .errmsg = true,
75b757ca
ACM
706 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
707 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 708 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 709 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
710 [3] = SCA_FD, /* group_fd */
711 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
712 { .name = "pipe2", .errmsg = true,
713 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
714 { .name = "poll", .errmsg = true, .timeout = true, },
715 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
716 { .name = "pread", .errmsg = true, .alias = "pread64", },
717 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 718 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
719 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
720 { .name = "pwritev", .errmsg = true, },
721 { .name = "read", .errmsg = true, },
12f3ca4f 722 { .name = "readlink", .errmsg = true, },
75b757ca 723 { .name = "readlinkat", .errmsg = true,
12f3ca4f 724 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 725 { .name = "readv", .errmsg = true, },
b2cc99fd 726 { .name = "recvfrom", .errmsg = true,
b6565c90 727 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 728 { .name = "recvmmsg", .errmsg = true,
b6565c90 729 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 730 { .name = "recvmsg", .errmsg = true,
b6565c90 731 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 732 { .name = "removexattr", .errmsg = true, },
75b757ca 733 { .name = "renameat", .errmsg = true,
48000a1a 734 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 735 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
736 { .name = "rt_sigaction", .errmsg = true,
737 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 738 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
739 { .name = "rt_sigqueueinfo", .errmsg = true,
740 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
741 { .name = "rt_tgsigqueueinfo", .errmsg = true,
742 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
743 { .name = "sched_getattr", .errmsg = true, },
744 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
745 { .name = "sched_setscheduler", .errmsg = true,
746 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
747 { .name = "seccomp", .errmsg = true,
748 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
749 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 750 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 751 { .name = "sendmmsg", .errmsg = true,
b6565c90 752 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 753 { .name = "sendmsg", .errmsg = true,
b6565c90 754 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 755 { .name = "sendto", .errmsg = true,
b6565c90 756 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 757 { .name = "set_tid_address", .errpid = true, },
453350dd 758 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 759 { .name = "setpgid", .errmsg = true, },
453350dd 760 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 761 { .name = "setxattr", .errmsg = true, },
b6565c90 762 { .name = "shutdown", .errmsg = true, },
e10bce81 763 { .name = "socket", .errmsg = true,
a28b24b2
ACM
764 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
765 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
766 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
767 { .name = "socketpair", .errmsg = true,
768 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
769 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 770 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
771 { .name = "stat", .errmsg = true, .alias = "newstat", },
772 { .name = "statfs", .errmsg = true, },
34221118
ACM
773 { .name = "swapoff", .errmsg = true,
774 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
775 { .name = "swapon", .errmsg = true,
776 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 777 { .name = "symlinkat", .errmsg = true,
48000a1a 778 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
779 { .name = "tgkill", .errmsg = true,
780 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
781 { .name = "tkill", .errmsg = true,
782 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 783 { .name = "truncate", .errmsg = true, },
e5959683 784 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 785 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
786 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
787 { .name = "utime", .errmsg = true, },
75b757ca 788 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
789 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
790 { .name = "utimes", .errmsg = true, },
b6565c90 791 { .name = "vmsplice", .errmsg = true, },
11c8e39f 792 { .name = "wait4", .errpid = true,
7206b900 793 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 794 { .name = "waitid", .errpid = true,
7206b900 795 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
796 { .name = "write", .errmsg = true, },
797 { .name = "writev", .errmsg = true, },
514f1c67
ACM
798};
799
800static int syscall_fmt__cmp(const void *name, const void *fmtp)
801{
802 const struct syscall_fmt *fmt = fmtp;
803 return strcmp(name, fmt->name);
804}
805
806static struct syscall_fmt *syscall_fmt__find(const char *name)
807{
808 const int nmemb = ARRAY_SIZE(syscall_fmts);
809 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
810}
811
812struct syscall {
813 struct event_format *tp_format;
f208bd8d
ACM
814 int nr_args;
815 struct format_field *args;
514f1c67 816 const char *name;
5089f20e 817 bool is_exit;
514f1c67 818 struct syscall_fmt *fmt;
01533e97 819 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 820 void **arg_parm;
514f1c67
ACM
821};
822
60c907ab
ACM
823static size_t fprintf_duration(unsigned long t, FILE *fp)
824{
825 double duration = (double)t / NSEC_PER_MSEC;
826 size_t printed = fprintf(fp, "(");
827
828 if (duration >= 1.0)
829 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
830 else if (duration >= 0.01)
831 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
832 else
833 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 834 return printed + fprintf(fp, "): ");
60c907ab
ACM
835}
836
f994592d
ACM
837/**
838 * filename.ptr: The filename char pointer that will be vfs_getname'd
839 * filename.entry_str_pos: Where to insert the string translated from
840 * filename.ptr by the vfs_getname tracepoint/kprobe.
841 */
752fde44
ACM
842struct thread_trace {
843 u64 entry_time;
752fde44 844 bool entry_pending;
efd5745e 845 unsigned long nr_events;
a2ea67d7 846 unsigned long pfmaj, pfmin;
752fde44 847 char *entry_str;
1302d88e 848 double runtime_ms;
f994592d
ACM
849 struct {
850 unsigned long ptr;
7f4f8001
ACM
851 short int entry_str_pos;
852 bool pending_open;
853 unsigned int namelen;
854 char *name;
f994592d 855 } filename;
75b757ca
ACM
856 struct {
857 int max;
858 char **table;
859 } paths;
bf2575c1
DA
860
861 struct intlist *syscall_stats;
752fde44
ACM
862};
863
864static struct thread_trace *thread_trace__new(void)
865{
75b757ca
ACM
866 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
867
868 if (ttrace)
869 ttrace->paths.max = -1;
870
bf2575c1
DA
871 ttrace->syscall_stats = intlist__new(NULL);
872
75b757ca 873 return ttrace;
752fde44
ACM
874}
875
c24ff998 876static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 877{
efd5745e
ACM
878 struct thread_trace *ttrace;
879
752fde44
ACM
880 if (thread == NULL)
881 goto fail;
882
89dceb22
NK
883 if (thread__priv(thread) == NULL)
884 thread__set_priv(thread, thread_trace__new());
48000a1a 885
89dceb22 886 if (thread__priv(thread) == NULL)
752fde44
ACM
887 goto fail;
888
89dceb22 889 ttrace = thread__priv(thread);
efd5745e
ACM
890 ++ttrace->nr_events;
891
892 return ttrace;
752fde44 893fail:
c24ff998 894 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
895 "WARNING: not enough memory, dropping samples!\n");
896 return NULL;
897}
898
598d02c5
SF
899#define TRACE_PFMAJ (1 << 0)
900#define TRACE_PFMIN (1 << 1)
901
e4d44e83
ACM
902static const size_t trace__entry_str_size = 2048;
903
97119f37 904static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 905{
89dceb22 906 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
907
908 if (fd > ttrace->paths.max) {
909 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
910
911 if (npath == NULL)
912 return -1;
913
914 if (ttrace->paths.max != -1) {
915 memset(npath + ttrace->paths.max + 1, 0,
916 (fd - ttrace->paths.max) * sizeof(char *));
917 } else {
918 memset(npath, 0, (fd + 1) * sizeof(char *));
919 }
920
921 ttrace->paths.table = npath;
922 ttrace->paths.max = fd;
923 }
924
925 ttrace->paths.table[fd] = strdup(pathname);
926
927 return ttrace->paths.table[fd] != NULL ? 0 : -1;
928}
929
97119f37
ACM
930static int thread__read_fd_path(struct thread *thread, int fd)
931{
932 char linkname[PATH_MAX], pathname[PATH_MAX];
933 struct stat st;
934 int ret;
935
936 if (thread->pid_ == thread->tid) {
937 scnprintf(linkname, sizeof(linkname),
938 "/proc/%d/fd/%d", thread->pid_, fd);
939 } else {
940 scnprintf(linkname, sizeof(linkname),
941 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
942 }
943
944 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
945 return -1;
946
947 ret = readlink(linkname, pathname, sizeof(pathname));
948
949 if (ret < 0 || ret > st.st_size)
950 return -1;
951
952 pathname[ret] = '\0';
953 return trace__set_fd_pathname(thread, fd, pathname);
954}
955
c522739d
ACM
956static const char *thread__fd_path(struct thread *thread, int fd,
957 struct trace *trace)
75b757ca 958{
89dceb22 959 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
960
961 if (ttrace == NULL)
962 return NULL;
963
964 if (fd < 0)
965 return NULL;
966
cdcd1e6b 967 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
968 if (!trace->live)
969 return NULL;
970 ++trace->stats.proc_getname;
cdcd1e6b 971 if (thread__read_fd_path(thread, fd))
c522739d
ACM
972 return NULL;
973 }
75b757ca
ACM
974
975 return ttrace->paths.table[fd];
976}
977
978static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
979 struct syscall_arg *arg)
980{
981 int fd = arg->val;
982 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 983 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
984
985 if (path)
986 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
987
988 return printed;
989}
990
991static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
992 struct syscall_arg *arg)
993{
994 int fd = arg->val;
995 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 996 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 997
04662523
ACM
998 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
999 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1000
1001 return printed;
1002}
1003
f994592d
ACM
1004static void thread__set_filename_pos(struct thread *thread, const char *bf,
1005 unsigned long ptr)
1006{
1007 struct thread_trace *ttrace = thread__priv(thread);
1008
1009 ttrace->filename.ptr = ptr;
1010 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1011}
1012
1013static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1014 struct syscall_arg *arg)
1015{
1016 unsigned long ptr = arg->val;
1017
1018 if (!arg->trace->vfs_getname)
1019 return scnprintf(bf, size, "%#x", ptr);
1020
1021 thread__set_filename_pos(arg->thread, bf, ptr);
1022 return 0;
1023}
1024
ae9ed035
ACM
1025static bool trace__filter_duration(struct trace *trace, double t)
1026{
1027 return t < (trace->duration_filter * NSEC_PER_MSEC);
1028}
1029
752fde44
ACM
1030static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1031{
1032 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1033
60c907ab 1034 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1035}
1036
f15eb531 1037static bool done = false;
ba209f85 1038static bool interrupted = false;
f15eb531 1039
ba209f85 1040static void sig_handler(int sig)
f15eb531
NK
1041{
1042 done = true;
ba209f85 1043 interrupted = sig == SIGINT;
f15eb531
NK
1044}
1045
752fde44 1046static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
60c907ab 1047 u64 duration, u64 tstamp, FILE *fp)
752fde44
ACM
1048{
1049 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
60c907ab 1050 printed += fprintf_duration(duration, fp);
752fde44 1051
50c95cbd
ACM
1052 if (trace->multiple_threads) {
1053 if (trace->show_comm)
1902efe7 1054 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1055 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1056 }
752fde44
ACM
1057
1058 return printed;
1059}
1060
c24ff998 1061static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1062 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1063{
1064 int ret = 0;
1065
1066 switch (event->header.type) {
1067 case PERF_RECORD_LOST:
c24ff998 1068 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1069 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1070 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1071 break;
752fde44 1072 default:
162f0bef 1073 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1074 break;
1075 }
1076
1077 return ret;
1078}
1079
c24ff998 1080static int trace__tool_process(struct perf_tool *tool,
752fde44 1081 union perf_event *event,
162f0bef 1082 struct perf_sample *sample,
752fde44
ACM
1083 struct machine *machine)
1084{
c24ff998 1085 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1086 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1087}
1088
caf8a0d0
ACM
1089static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1090{
1091 struct machine *machine = vmachine;
1092
1093 if (machine->kptr_restrict_warned)
1094 return NULL;
1095
1096 if (symbol_conf.kptr_restrict) {
1097 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1098 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1099 "Kernel samples will not be resolved.\n");
1100 machine->kptr_restrict_warned = true;
1101 return NULL;
1102 }
1103
1104 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1105}
1106
752fde44
ACM
1107static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1108{
0a7e6d1b 1109 int err = symbol__init(NULL);
752fde44
ACM
1110
1111 if (err)
1112 return err;
1113
8fb598e5
DA
1114 trace->host = machine__new_host();
1115 if (trace->host == NULL)
1116 return -ENOMEM;
752fde44 1117
caf8a0d0 1118 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1119 return -errno;
1120
a33fbd56 1121 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1122 evlist->threads, trace__tool_process, false,
1123 trace->opts.proc_map_timeout);
752fde44
ACM
1124 if (err)
1125 symbol__exit();
1126
1127 return err;
1128}
1129
13d4ff3e
ACM
1130static int syscall__set_arg_fmts(struct syscall *sc)
1131{
1132 struct format_field *field;
b6565c90 1133 int idx = 0, len;
13d4ff3e 1134
f208bd8d 1135 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1136 if (sc->arg_scnprintf == NULL)
1137 return -1;
1138
1f115cb7
ACM
1139 if (sc->fmt)
1140 sc->arg_parm = sc->fmt->arg_parm;
1141
f208bd8d 1142 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1143 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1144 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1145 else if (strcmp(field->type, "const char *") == 0 &&
1146 (strcmp(field->name, "filename") == 0 ||
1147 strcmp(field->name, "path") == 0 ||
1148 strcmp(field->name, "pathname") == 0))
1149 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1150 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1151 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1152 else if (strcmp(field->type, "pid_t") == 0)
1153 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1154 else if (strcmp(field->type, "umode_t") == 0)
1155 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1156 else if ((strcmp(field->type, "int") == 0 ||
1157 strcmp(field->type, "unsigned int") == 0 ||
1158 strcmp(field->type, "long") == 0) &&
1159 (len = strlen(field->name)) >= 2 &&
1160 strcmp(field->name + len - 2, "fd") == 0) {
1161 /*
1162 * /sys/kernel/tracing/events/syscalls/sys_enter*
1163 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1164 * 65 int
1165 * 23 unsigned int
1166 * 7 unsigned long
1167 */
1168 sc->arg_scnprintf[idx] = SCA_FD;
1169 }
13d4ff3e
ACM
1170 ++idx;
1171 }
1172
1173 return 0;
1174}
1175
514f1c67
ACM
1176static int trace__read_syscall_info(struct trace *trace, int id)
1177{
1178 char tp_name[128];
1179 struct syscall *sc;
fd0db102 1180 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1181
1182 if (name == NULL)
1183 return -1;
514f1c67
ACM
1184
1185 if (id > trace->syscalls.max) {
1186 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1187
1188 if (nsyscalls == NULL)
1189 return -1;
1190
1191 if (trace->syscalls.max != -1) {
1192 memset(nsyscalls + trace->syscalls.max + 1, 0,
1193 (id - trace->syscalls.max) * sizeof(*sc));
1194 } else {
1195 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1196 }
1197
1198 trace->syscalls.table = nsyscalls;
1199 trace->syscalls.max = id;
1200 }
1201
1202 sc = trace->syscalls.table + id;
3a531260 1203 sc->name = name;
2ae3a312 1204
3a531260 1205 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1206
aec1930b 1207 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1208 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1209
8dd2a131 1210 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1211 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1212 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1213 }
514f1c67 1214
8dd2a131 1215 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1216 return -1;
1217
f208bd8d
ACM
1218 sc->args = sc->tp_format->format.fields;
1219 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1220 /*
1221 * We need to check and discard the first variable '__syscall_nr'
1222 * or 'nr' that mean the syscall number. It is needless here.
1223 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1224 */
1225 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1226 sc->args = sc->args->next;
1227 --sc->nr_args;
1228 }
1229
5089f20e
ACM
1230 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1231
13d4ff3e 1232 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1233}
1234
d0cc439b
ACM
1235static int trace__validate_ev_qualifier(struct trace *trace)
1236{
8b3ce757 1237 int err = 0, i;
d0cc439b
ACM
1238 struct str_node *pos;
1239
8b3ce757
ACM
1240 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1241 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1242 sizeof(trace->ev_qualifier_ids.entries[0]));
1243
1244 if (trace->ev_qualifier_ids.entries == NULL) {
1245 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1246 trace->output);
1247 err = -EINVAL;
1248 goto out;
1249 }
1250
1251 i = 0;
1252
602a1f4d 1253 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1254 const char *sc = pos->s;
fd0db102 1255 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1256
8b3ce757 1257 if (id < 0) {
d0cc439b
ACM
1258 if (err == 0) {
1259 fputs("Error:\tInvalid syscall ", trace->output);
1260 err = -EINVAL;
1261 } else {
1262 fputs(", ", trace->output);
1263 }
1264
1265 fputs(sc, trace->output);
1266 }
8b3ce757
ACM
1267
1268 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1269 }
1270
1271 if (err < 0) {
1272 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1273 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1274 zfree(&trace->ev_qualifier_ids.entries);
1275 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1276 }
8b3ce757 1277out:
d0cc439b
ACM
1278 return err;
1279}
1280
55d43bca
DA
1281/*
1282 * args is to be interpreted as a series of longs but we need to handle
1283 * 8-byte unaligned accesses. args points to raw_data within the event
1284 * and raw_data is guaranteed to be 8-byte unaligned because it is
1285 * preceded by raw_size which is a u32. So we need to copy args to a temp
1286 * variable to read it. Most notably this avoids extended load instructions
1287 * on unaligned addresses
1288 */
1289
752fde44 1290static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1291 unsigned char *args, struct trace *trace,
75b757ca 1292 struct thread *thread)
514f1c67 1293{
514f1c67 1294 size_t printed = 0;
55d43bca
DA
1295 unsigned char *p;
1296 unsigned long val;
514f1c67 1297
f208bd8d 1298 if (sc->args != NULL) {
514f1c67 1299 struct format_field *field;
01533e97
ACM
1300 u8 bit = 1;
1301 struct syscall_arg arg = {
75b757ca
ACM
1302 .idx = 0,
1303 .mask = 0,
1304 .trace = trace,
1305 .thread = thread,
01533e97 1306 };
6e7eeb51 1307
f208bd8d 1308 for (field = sc->args; field;
01533e97
ACM
1309 field = field->next, ++arg.idx, bit <<= 1) {
1310 if (arg.mask & bit)
6e7eeb51 1311 continue;
55d43bca
DA
1312
1313 /* special care for unaligned accesses */
1314 p = args + sizeof(unsigned long) * arg.idx;
1315 memcpy(&val, p, sizeof(val));
1316
4aa58232
ACM
1317 /*
1318 * Suppress this argument if its value is zero and
1319 * and we don't have a string associated in an
1320 * strarray for it.
1321 */
55d43bca 1322 if (val == 0 &&
4aa58232
ACM
1323 !(sc->arg_scnprintf &&
1324 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1325 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1326 continue;
1327
752fde44 1328 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1329 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1330 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1331 arg.val = val;
1f115cb7
ACM
1332 if (sc->arg_parm)
1333 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1334 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1335 size - printed, &arg);
6e7eeb51 1336 } else {
13d4ff3e 1337 printed += scnprintf(bf + printed, size - printed,
55d43bca 1338 "%ld", val);
6e7eeb51 1339 }
514f1c67 1340 }
4c4d6e51
ACM
1341 } else if (IS_ERR(sc->tp_format)) {
1342 /*
1343 * If we managed to read the tracepoint /format file, then we
1344 * may end up not having any args, like with gettid(), so only
1345 * print the raw args when we didn't manage to read it.
1346 */
01533e97
ACM
1347 int i = 0;
1348
514f1c67 1349 while (i < 6) {
55d43bca
DA
1350 /* special care for unaligned accesses */
1351 p = args + sizeof(unsigned long) * i;
1352 memcpy(&val, p, sizeof(val));
752fde44
ACM
1353 printed += scnprintf(bf + printed, size - printed,
1354 "%sarg%d: %ld",
55d43bca 1355 printed ? ", " : "", i, val);
514f1c67
ACM
1356 ++i;
1357 }
1358 }
1359
1360 return printed;
1361}
1362
ba3d7dee 1363typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1364 union perf_event *event,
ba3d7dee
ACM
1365 struct perf_sample *sample);
1366
1367static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1368 struct perf_evsel *evsel, int id)
ba3d7dee 1369{
ba3d7dee
ACM
1370
1371 if (id < 0) {
adaa18bf
ACM
1372
1373 /*
1374 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1375 * before that, leaving at a higher verbosity level till that is
1376 * explained. Reproduced with plain ftrace with:
1377 *
1378 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1379 * grep "NR -1 " /t/trace_pipe
1380 *
1381 * After generating some load on the machine.
1382 */
1383 if (verbose > 1) {
1384 static u64 n;
1385 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1386 id, perf_evsel__name(evsel), ++n);
1387 }
ba3d7dee
ACM
1388 return NULL;
1389 }
1390
1391 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1392 trace__read_syscall_info(trace, id))
1393 goto out_cant_read;
1394
1395 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1396 goto out_cant_read;
1397
1398 return &trace->syscalls.table[id];
1399
1400out_cant_read:
7c304ee0
ACM
1401 if (verbose) {
1402 fprintf(trace->output, "Problems reading syscall %d", id);
1403 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1404 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1405 fputs(" information\n", trace->output);
1406 }
ba3d7dee
ACM
1407 return NULL;
1408}
1409
bf2575c1
DA
1410static void thread__update_stats(struct thread_trace *ttrace,
1411 int id, struct perf_sample *sample)
1412{
1413 struct int_node *inode;
1414 struct stats *stats;
1415 u64 duration = 0;
1416
1417 inode = intlist__findnew(ttrace->syscall_stats, id);
1418 if (inode == NULL)
1419 return;
1420
1421 stats = inode->priv;
1422 if (stats == NULL) {
1423 stats = malloc(sizeof(struct stats));
1424 if (stats == NULL)
1425 return;
1426 init_stats(stats);
1427 inode->priv = stats;
1428 }
1429
1430 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1431 duration = sample->time - ttrace->entry_time;
1432
1433 update_stats(stats, duration);
1434}
1435
e596663e
ACM
1436static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1437{
1438 struct thread_trace *ttrace;
1439 u64 duration;
1440 size_t printed;
1441
1442 if (trace->current == NULL)
1443 return 0;
1444
1445 ttrace = thread__priv(trace->current);
1446
1447 if (!ttrace->entry_pending)
1448 return 0;
1449
1450 duration = sample->time - ttrace->entry_time;
1451
ecf1e225 1452 printed = trace__fprintf_entry_head(trace, trace->current, duration, ttrace->entry_time, trace->output);
e596663e
ACM
1453 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1454 ttrace->entry_pending = false;
1455
1456 return printed;
1457}
1458
ba3d7dee 1459static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1460 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1461 struct perf_sample *sample)
1462{
752fde44 1463 char *msg;
ba3d7dee 1464 void *args;
752fde44 1465 size_t printed = 0;
2ae3a312 1466 struct thread *thread;
b91fc39f 1467 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1468 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1469 struct thread_trace *ttrace;
1470
1471 if (sc == NULL)
1472 return -1;
ba3d7dee 1473
8fb598e5 1474 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1475 ttrace = thread__trace(thread, trace->output);
2ae3a312 1476 if (ttrace == NULL)
b91fc39f 1477 goto out_put;
ba3d7dee 1478
77170988 1479 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1480
1481 if (ttrace->entry_str == NULL) {
e4d44e83 1482 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1483 if (!ttrace->entry_str)
b91fc39f 1484 goto out_put;
752fde44
ACM
1485 }
1486
5cf9c84e 1487 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1488 trace__printf_interrupted_entry(trace, sample);
e596663e 1489
752fde44
ACM
1490 ttrace->entry_time = sample->time;
1491 msg = ttrace->entry_str;
e4d44e83 1492 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1493
e4d44e83 1494 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1495 args, trace, thread);
752fde44 1496
5089f20e 1497 if (sc->is_exit) {
5cf9c84e 1498 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
ecf1e225 1499 trace__fprintf_entry_head(trace, thread, 1, ttrace->entry_time, trace->output);
c008f78f 1500 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1501 }
7f4f8001 1502 } else {
752fde44 1503 ttrace->entry_pending = true;
7f4f8001
ACM
1504 /* See trace__vfs_getname & trace__sys_exit */
1505 ttrace->filename.pending_open = false;
1506 }
ba3d7dee 1507
f3b623b8
ACM
1508 if (trace->current != thread) {
1509 thread__put(trace->current);
1510 trace->current = thread__get(thread);
1511 }
b91fc39f
ACM
1512 err = 0;
1513out_put:
1514 thread__put(thread);
1515 return err;
ba3d7dee
ACM
1516}
1517
5cf9c84e
ACM
1518static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1519 struct perf_sample *sample,
1520 struct callchain_cursor *cursor)
202ff968
ACM
1521{
1522 struct addr_location al;
5cf9c84e
ACM
1523
1524 if (machine__resolve(trace->host, &al, sample) < 0 ||
1525 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1526 return -1;
1527
1528 return 0;
1529}
1530
1531static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1532{
202ff968 1533 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1534 const unsigned int print_opts = EVSEL__PRINT_SYM |
1535 EVSEL__PRINT_DSO |
1536 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1537
d327e60c 1538 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1539}
1540
ba3d7dee 1541static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1542 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1543 struct perf_sample *sample)
1544{
2c82c3ad 1545 long ret;
60c907ab 1546 u64 duration = 0;
2ae3a312 1547 struct thread *thread;
5cf9c84e 1548 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1549 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1550 struct thread_trace *ttrace;
1551
1552 if (sc == NULL)
1553 return -1;
ba3d7dee 1554
8fb598e5 1555 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1556 ttrace = thread__trace(thread, trace->output);
2ae3a312 1557 if (ttrace == NULL)
b91fc39f 1558 goto out_put;
ba3d7dee 1559
bf2575c1
DA
1560 if (trace->summary)
1561 thread__update_stats(ttrace, id, sample);
1562
77170988 1563 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1564
fd0db102 1565 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1566 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1567 ttrace->filename.pending_open = false;
c522739d
ACM
1568 ++trace->stats.vfs_getname;
1569 }
1570
ae9ed035 1571 if (ttrace->entry_time) {
60c907ab 1572 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1573 if (trace__filter_duration(trace, duration))
1574 goto out;
1575 } else if (trace->duration_filter)
1576 goto out;
60c907ab 1577
5cf9c84e
ACM
1578 if (sample->callchain) {
1579 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1580 if (callchain_ret == 0) {
1581 if (callchain_cursor.nr < trace->min_stack)
1582 goto out;
1583 callchain_ret = 1;
1584 }
1585 }
1586
fd2eabaf
DA
1587 if (trace->summary_only)
1588 goto out;
1589
ecf1e225 1590 trace__fprintf_entry_head(trace, thread, duration, ttrace->entry_time, trace->output);
752fde44
ACM
1591
1592 if (ttrace->entry_pending) {
c24ff998 1593 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1594 } else {
c24ff998
ACM
1595 fprintf(trace->output, " ... [");
1596 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1597 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1598 }
1599
da3c9a44
ACM
1600 if (sc->fmt == NULL) {
1601signed_print:
2c82c3ad 1602 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1603 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1604 char bf[STRERR_BUFSIZE];
c8b5f2c9 1605 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1606 *e = audit_errno_to_name(-ret);
1607
c24ff998 1608 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1609 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1610 fprintf(trace->output, ") = 0 Timeout");
04b34729 1611 else if (sc->fmt->hexret)
2c82c3ad 1612 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1613 else if (sc->fmt->errpid) {
1614 struct thread *child = machine__find_thread(trace->host, ret, ret);
1615
1616 if (child != NULL) {
1617 fprintf(trace->output, ") = %ld", ret);
1618 if (child->comm_set)
1619 fprintf(trace->output, " (%s)", thread__comm_str(child));
1620 thread__put(child);
1621 }
1622 } else
da3c9a44 1623 goto signed_print;
ba3d7dee 1624
c24ff998 1625 fputc('\n', trace->output);
566a0885 1626
5cf9c84e
ACM
1627 if (callchain_ret > 0)
1628 trace__fprintf_callchain(trace, sample);
1629 else if (callchain_ret < 0)
1630 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1631out:
752fde44 1632 ttrace->entry_pending = false;
b91fc39f
ACM
1633 err = 0;
1634out_put:
1635 thread__put(thread);
1636 return err;
ba3d7dee
ACM
1637}
1638
c522739d 1639static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1640 union perf_event *event __maybe_unused,
c522739d
ACM
1641 struct perf_sample *sample)
1642{
f994592d
ACM
1643 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1644 struct thread_trace *ttrace;
1645 size_t filename_len, entry_str_len, to_move;
1646 ssize_t remaining_space;
1647 char *pos;
7f4f8001 1648 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1649
1650 if (!thread)
1651 goto out;
1652
1653 ttrace = thread__priv(thread);
1654 if (!ttrace)
1655 goto out;
1656
7f4f8001
ACM
1657 filename_len = strlen(filename);
1658
1659 if (ttrace->filename.namelen < filename_len) {
1660 char *f = realloc(ttrace->filename.name, filename_len + 1);
1661
1662 if (f == NULL)
1663 goto out;
1664
1665 ttrace->filename.namelen = filename_len;
1666 ttrace->filename.name = f;
1667 }
1668
1669 strcpy(ttrace->filename.name, filename);
1670 ttrace->filename.pending_open = true;
1671
f994592d
ACM
1672 if (!ttrace->filename.ptr)
1673 goto out;
1674
1675 entry_str_len = strlen(ttrace->entry_str);
1676 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1677 if (remaining_space <= 0)
1678 goto out;
1679
f994592d
ACM
1680 if (filename_len > (size_t)remaining_space) {
1681 filename += filename_len - remaining_space;
1682 filename_len = remaining_space;
1683 }
1684
1685 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1686 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1687 memmove(pos + filename_len, pos, to_move);
1688 memcpy(pos, filename, filename_len);
1689
1690 ttrace->filename.ptr = 0;
1691 ttrace->filename.entry_str_pos = 0;
1692out:
c522739d
ACM
1693 return 0;
1694}
1695
1302d88e 1696static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1697 union perf_event *event __maybe_unused,
1302d88e
ACM
1698 struct perf_sample *sample)
1699{
1700 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1701 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1702 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1703 sample->pid,
1704 sample->tid);
c24ff998 1705 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1706
1707 if (ttrace == NULL)
1708 goto out_dump;
1709
1710 ttrace->runtime_ms += runtime_ms;
1711 trace->runtime_ms += runtime_ms;
b91fc39f 1712 thread__put(thread);
1302d88e
ACM
1713 return 0;
1714
1715out_dump:
c24ff998 1716 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1717 evsel->name,
1718 perf_evsel__strval(evsel, sample, "comm"),
1719 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1720 runtime,
1721 perf_evsel__intval(evsel, sample, "vruntime"));
b91fc39f 1722 thread__put(thread);
1302d88e
ACM
1723 return 0;
1724}
1725
1d6c9407
WN
1726static void bpf_output__printer(enum binary_printer_ops op,
1727 unsigned int val, void *extra)
1728{
1729 FILE *output = extra;
1730 unsigned char ch = (unsigned char)val;
1731
1732 switch (op) {
1733 case BINARY_PRINT_CHAR_DATA:
1734 fprintf(output, "%c", isprint(ch) ? ch : '.');
1735 break;
1736 case BINARY_PRINT_DATA_BEGIN:
1737 case BINARY_PRINT_LINE_BEGIN:
1738 case BINARY_PRINT_ADDR:
1739 case BINARY_PRINT_NUM_DATA:
1740 case BINARY_PRINT_NUM_PAD:
1741 case BINARY_PRINT_SEP:
1742 case BINARY_PRINT_CHAR_PAD:
1743 case BINARY_PRINT_LINE_END:
1744 case BINARY_PRINT_DATA_END:
1745 default:
1746 break;
1747 }
1748}
1749
1750static void bpf_output__fprintf(struct trace *trace,
1751 struct perf_sample *sample)
1752{
1753 print_binary(sample->raw_data, sample->raw_size, 8,
1754 bpf_output__printer, trace->output);
1755}
1756
14a052df
ACM
1757static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1758 union perf_event *event __maybe_unused,
1759 struct perf_sample *sample)
1760{
7ad35615
ACM
1761 int callchain_ret = 0;
1762
1763 if (sample->callchain) {
1764 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1765 if (callchain_ret == 0) {
1766 if (callchain_cursor.nr < trace->min_stack)
1767 goto out;
1768 callchain_ret = 1;
1769 }
1770 }
1771
14a052df
ACM
1772 trace__printf_interrupted_entry(trace, sample);
1773 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1774
1775 if (trace->trace_syscalls)
1776 fprintf(trace->output, "( ): ");
1777
1778 fprintf(trace->output, "%s:", evsel->name);
14a052df 1779
1d6c9407
WN
1780 if (perf_evsel__is_bpf_output(evsel)) {
1781 bpf_output__fprintf(trace, sample);
1782 } else if (evsel->tp_format) {
14a052df
ACM
1783 event_format__fprintf(evsel->tp_format, sample->cpu,
1784 sample->raw_data, sample->raw_size,
1785 trace->output);
1786 }
1787
1788 fprintf(trace->output, ")\n");
202ff968 1789
7ad35615
ACM
1790 if (callchain_ret > 0)
1791 trace__fprintf_callchain(trace, sample);
1792 else if (callchain_ret < 0)
1793 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1794out:
14a052df
ACM
1795 return 0;
1796}
1797
598d02c5
SF
1798static void print_location(FILE *f, struct perf_sample *sample,
1799 struct addr_location *al,
1800 bool print_dso, bool print_sym)
1801{
1802
1803 if ((verbose || print_dso) && al->map)
1804 fprintf(f, "%s@", al->map->dso->long_name);
1805
1806 if ((verbose || print_sym) && al->sym)
4414a3c5 1807 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1808 al->addr - al->sym->start);
1809 else if (al->map)
4414a3c5 1810 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1811 else
4414a3c5 1812 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1813}
1814
1815static int trace__pgfault(struct trace *trace,
1816 struct perf_evsel *evsel,
473398a2 1817 union perf_event *event __maybe_unused,
598d02c5
SF
1818 struct perf_sample *sample)
1819{
1820 struct thread *thread;
598d02c5
SF
1821 struct addr_location al;
1822 char map_type = 'd';
a2ea67d7 1823 struct thread_trace *ttrace;
b91fc39f 1824 int err = -1;
1df54290 1825 int callchain_ret = 0;
598d02c5
SF
1826
1827 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1828
1829 if (sample->callchain) {
1830 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1831 if (callchain_ret == 0) {
1832 if (callchain_cursor.nr < trace->min_stack)
1833 goto out_put;
1834 callchain_ret = 1;
1835 }
1836 }
1837
a2ea67d7
SF
1838 ttrace = thread__trace(thread, trace->output);
1839 if (ttrace == NULL)
b91fc39f 1840 goto out_put;
a2ea67d7
SF
1841
1842 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1843 ttrace->pfmaj++;
1844 else
1845 ttrace->pfmin++;
1846
1847 if (trace->summary_only)
b91fc39f 1848 goto out;
598d02c5 1849
473398a2 1850 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1851 sample->ip, &al);
1852
1853 trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
1854
1855 fprintf(trace->output, "%sfault [",
1856 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1857 "maj" : "min");
1858
1859 print_location(trace->output, sample, &al, false, true);
1860
1861 fprintf(trace->output, "] => ");
1862
473398a2 1863 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1864 sample->addr, &al);
1865
1866 if (!al.map) {
473398a2 1867 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1868 MAP__FUNCTION, sample->addr, &al);
1869
1870 if (al.map)
1871 map_type = 'x';
1872 else
1873 map_type = '?';
1874 }
1875
1876 print_location(trace->output, sample, &al, true, false);
1877
1878 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1879
1df54290
ACM
1880 if (callchain_ret > 0)
1881 trace__fprintf_callchain(trace, sample);
1882 else if (callchain_ret < 0)
1883 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1884out:
1885 err = 0;
1886out_put:
1887 thread__put(thread);
1888 return err;
598d02c5
SF
1889}
1890
e6001980 1891static void trace__set_base_time(struct trace *trace,
8a07a809 1892 struct perf_evsel *evsel,
e6001980
ACM
1893 struct perf_sample *sample)
1894{
8a07a809
ACM
1895 /*
1896 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1897 * and don't use sample->time unconditionally, we may end up having
1898 * some other event in the future without PERF_SAMPLE_TIME for good
1899 * reason, i.e. we may not be interested in its timestamps, just in
1900 * it taking place, picking some piece of information when it
1901 * appears in our event stream (vfs_getname comes to mind).
1902 */
1903 if (trace->base_time == 0 && !trace->full_time &&
1904 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1905 trace->base_time = sample->time;
1906}
1907
6810fc91 1908static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1909 union perf_event *event,
6810fc91
DA
1910 struct perf_sample *sample,
1911 struct perf_evsel *evsel,
1912 struct machine *machine __maybe_unused)
1913{
1914 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1915 struct thread *thread;
6810fc91
DA
1916 int err = 0;
1917
744a9719 1918 tracepoint_handler handler = evsel->handler;
6810fc91 1919
aa07df6e
DA
1920 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1921 if (thread && thread__is_filtered(thread))
bdc89661
DA
1922 return 0;
1923
e6001980 1924 trace__set_base_time(trace, evsel, sample);
6810fc91 1925
3160565f
DA
1926 if (handler) {
1927 ++trace->nr_events;
0c82adcf 1928 handler(trace, evsel, event, sample);
3160565f 1929 }
6810fc91
DA
1930
1931 return err;
1932}
1933
1e28fe0a 1934static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1935{
1936 unsigned int rec_argc, i, j;
1937 const char **rec_argv;
1938 const char * const record_args[] = {
1939 "record",
1940 "-R",
1941 "-m", "1024",
1942 "-c", "1",
5e2485b1
DA
1943 };
1944
1e28fe0a
SF
1945 const char * const sc_args[] = { "-e", };
1946 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1947 const char * const majpf_args[] = { "-e", "major-faults" };
1948 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1949 const char * const minpf_args[] = { "-e", "minor-faults" };
1950 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1951
9aca7f17 1952 /* +1 is for the event string below */
1e28fe0a
SF
1953 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1954 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1955 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1956
1957 if (rec_argv == NULL)
1958 return -ENOMEM;
1959
1e28fe0a 1960 j = 0;
5e2485b1 1961 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1962 rec_argv[j++] = record_args[i];
1963
e281a960
SF
1964 if (trace->trace_syscalls) {
1965 for (i = 0; i < sc_args_nr; i++)
1966 rec_argv[j++] = sc_args[i];
1967
1968 /* event string may be different for older kernels - e.g., RHEL6 */
1969 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
1970 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
1971 else if (is_valid_tracepoint("syscalls:sys_enter"))
1972 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
1973 else {
1974 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
1975 return -1;
1976 }
9aca7f17 1977 }
9aca7f17 1978
1e28fe0a
SF
1979 if (trace->trace_pgfaults & TRACE_PFMAJ)
1980 for (i = 0; i < majpf_args_nr; i++)
1981 rec_argv[j++] = majpf_args[i];
1982
1983 if (trace->trace_pgfaults & TRACE_PFMIN)
1984 for (i = 0; i < minpf_args_nr; i++)
1985 rec_argv[j++] = minpf_args[i];
1986
1987 for (i = 0; i < (unsigned int)argc; i++)
1988 rec_argv[j++] = argv[i];
5e2485b1 1989
1e28fe0a 1990 return cmd_record(j, rec_argv, NULL);
5e2485b1
DA
1991}
1992
bf2575c1
DA
1993static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
1994
08c98776 1995static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 1996{
ef503831 1997 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
1998
1999 if (IS_ERR(evsel))
08c98776 2000 return false;
c522739d
ACM
2001
2002 if (perf_evsel__field(evsel, "pathname") == NULL) {
2003 perf_evsel__delete(evsel);
08c98776 2004 return false;
c522739d
ACM
2005 }
2006
744a9719 2007 evsel->handler = trace__vfs_getname;
c522739d 2008 perf_evlist__add(evlist, evsel);
08c98776 2009 return true;
c522739d
ACM
2010}
2011
0ae537cb 2012static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2013{
2014 struct perf_evsel *evsel;
2015 struct perf_event_attr attr = {
2016 .type = PERF_TYPE_SOFTWARE,
2017 .mmap_data = 1,
598d02c5
SF
2018 };
2019
2020 attr.config = config;
0524798c 2021 attr.sample_period = 1;
598d02c5
SF
2022
2023 event_attr_init(&attr);
2024
2025 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2026 if (evsel)
2027 evsel->handler = trace__pgfault;
598d02c5 2028
0ae537cb 2029 return evsel;
598d02c5
SF
2030}
2031
ddbb1b13
ACM
2032static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2033{
2034 const u32 type = event->header.type;
2035 struct perf_evsel *evsel;
2036
ddbb1b13
ACM
2037 if (type != PERF_RECORD_SAMPLE) {
2038 trace__process_event(trace, trace->host, event, sample);
2039 return;
2040 }
2041
2042 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2043 if (evsel == NULL) {
2044 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2045 return;
2046 }
2047
e6001980
ACM
2048 trace__set_base_time(trace, evsel, sample);
2049
ddbb1b13
ACM
2050 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2051 sample->raw_data == NULL) {
2052 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2053 perf_evsel__name(evsel), sample->tid,
2054 sample->cpu, sample->raw_size);
2055 } else {
2056 tracepoint_handler handler = evsel->handler;
2057 handler(trace, evsel, event, sample);
2058 }
2059}
2060
c27366f0
ACM
2061static int trace__add_syscall_newtp(struct trace *trace)
2062{
2063 int ret = -1;
2064 struct perf_evlist *evlist = trace->evlist;
2065 struct perf_evsel *sys_enter, *sys_exit;
2066
2067 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2068 if (sys_enter == NULL)
2069 goto out;
2070
2071 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2072 goto out_delete_sys_enter;
2073
2074 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2075 if (sys_exit == NULL)
2076 goto out_delete_sys_enter;
2077
2078 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2079 goto out_delete_sys_exit;
2080
2081 perf_evlist__add(evlist, sys_enter);
2082 perf_evlist__add(evlist, sys_exit);
2083
2ddd5c04 2084 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2085 /*
2086 * We're interested only in the user space callchain
2087 * leading to the syscall, allow overriding that for
2088 * debugging reasons using --kernel_syscall_callchains
2089 */
2090 sys_exit->attr.exclude_callchain_kernel = 1;
2091 }
2092
8b3ce757
ACM
2093 trace->syscalls.events.sys_enter = sys_enter;
2094 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2095
2096 ret = 0;
2097out:
2098 return ret;
2099
2100out_delete_sys_exit:
2101 perf_evsel__delete_priv(sys_exit);
2102out_delete_sys_enter:
2103 perf_evsel__delete_priv(sys_enter);
2104 goto out;
2105}
2106
19867b61
ACM
2107static int trace__set_ev_qualifier_filter(struct trace *trace)
2108{
2109 int err = -1;
b15d0a4c 2110 struct perf_evsel *sys_exit;
19867b61
ACM
2111 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2112 trace->ev_qualifier_ids.nr,
2113 trace->ev_qualifier_ids.entries);
2114
2115 if (filter == NULL)
2116 goto out_enomem;
2117
3541c034
MP
2118 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2119 filter)) {
b15d0a4c 2120 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2121 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2122 }
19867b61
ACM
2123
2124 free(filter);
2125out:
2126 return err;
2127out_enomem:
2128 errno = ENOMEM;
2129 goto out;
2130}
c27366f0 2131
f15eb531 2132static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2133{
14a052df 2134 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2135 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2136 int err = -1, i;
2137 unsigned long before;
f15eb531 2138 const bool forks = argc > 0;
46fb3c21 2139 bool draining = false;
514f1c67 2140
75b757ca
ACM
2141 trace->live = true;
2142
c27366f0 2143 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2144 goto out_error_raw_syscalls;
514f1c67 2145
e281a960 2146 if (trace->trace_syscalls)
08c98776 2147 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2148
0ae537cb
ACM
2149 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2150 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2151 if (pgfault_maj == NULL)
2152 goto out_error_mem;
2153 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2154 }
598d02c5 2155
0ae537cb
ACM
2156 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2157 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2158 if (pgfault_min == NULL)
2159 goto out_error_mem;
2160 perf_evlist__add(evlist, pgfault_min);
2161 }
598d02c5 2162
1302d88e 2163 if (trace->sched &&
2cc990ba
ACM
2164 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2165 trace__sched_stat_runtime))
2166 goto out_error_sched_stat_runtime;
1302d88e 2167
514f1c67
ACM
2168 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2169 if (err < 0) {
c24ff998 2170 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2171 goto out_delete_evlist;
2172 }
2173
752fde44
ACM
2174 err = trace__symbols_init(trace, evlist);
2175 if (err < 0) {
c24ff998 2176 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2177 goto out_delete_evlist;
752fde44
ACM
2178 }
2179
fde54b78
ACM
2180 perf_evlist__config(evlist, &trace->opts, NULL);
2181
0c3a6ef4
ACM
2182 if (callchain_param.enabled) {
2183 bool use_identifier = false;
2184
2185 if (trace->syscalls.events.sys_exit) {
2186 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2187 &trace->opts, &callchain_param);
2188 use_identifier = true;
2189 }
2190
2191 if (pgfault_maj) {
2192 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2193 use_identifier = true;
2194 }
2195
2196 if (pgfault_min) {
2197 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2198 use_identifier = true;
2199 }
2200
2201 if (use_identifier) {
2202 /*
2203 * Now we have evsels with different sample_ids, use
2204 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2205 * from a fixed position in each ring buffer record.
2206 *
2207 * As of this the changeset introducing this comment, this
2208 * isn't strictly needed, as the fields that can come before
2209 * PERF_SAMPLE_ID are all used, but we'll probably disable
2210 * some of those for things like copying the payload of
2211 * pointer syscall arguments, and for vfs_getname we don't
2212 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2213 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2214 */
2215 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2216 perf_evlist__reset_sample_bit(evlist, ID);
2217 }
fde54b78 2218 }
514f1c67 2219
f15eb531
NK
2220 signal(SIGCHLD, sig_handler);
2221 signal(SIGINT, sig_handler);
2222
2223 if (forks) {
6ef73ec4 2224 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2225 argv, false, NULL);
f15eb531 2226 if (err < 0) {
c24ff998 2227 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2228 goto out_delete_evlist;
f15eb531
NK
2229 }
2230 }
2231
514f1c67 2232 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2233 if (err < 0)
2234 goto out_error_open;
514f1c67 2235
ba504235
WN
2236 err = bpf__apply_obj_config();
2237 if (err) {
2238 char errbuf[BUFSIZ];
2239
2240 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2241 pr_err("ERROR: Apply config to BPF failed: %s\n",
2242 errbuf);
2243 goto out_error_open;
2244 }
2245
241b057c
ACM
2246 /*
2247 * Better not use !target__has_task() here because we need to cover the
2248 * case where no threads were specified in the command line, but a
2249 * workload was, and in that case we will fill in the thread_map when
2250 * we fork the workload in perf_evlist__prepare_workload.
2251 */
f078c385
ACM
2252 if (trace->filter_pids.nr > 0)
2253 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2254 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2255 err = perf_evlist__set_filter_pid(evlist, getpid());
2256
94ad89bc
ACM
2257 if (err < 0)
2258 goto out_error_mem;
2259
19867b61
ACM
2260 if (trace->ev_qualifier_ids.nr > 0) {
2261 err = trace__set_ev_qualifier_filter(trace);
2262 if (err < 0)
2263 goto out_errno;
19867b61 2264
2e5e5f87
ACM
2265 pr_debug("event qualifier tracepoint filter: %s\n",
2266 trace->syscalls.events.sys_exit->filter);
2267 }
19867b61 2268
94ad89bc
ACM
2269 err = perf_evlist__apply_filters(evlist, &evsel);
2270 if (err < 0)
2271 goto out_error_apply_filters;
241b057c 2272
f885037e 2273 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2274 if (err < 0)
2275 goto out_error_mmap;
514f1c67 2276
e36b7821 2277 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2278 perf_evlist__enable(evlist);
2279
f15eb531
NK
2280 if (forks)
2281 perf_evlist__start_workload(evlist);
2282
e36b7821
AB
2283 if (trace->opts.initial_delay) {
2284 usleep(trace->opts.initial_delay * 1000);
2285 perf_evlist__enable(evlist);
2286 }
2287
e13798c7 2288 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2289 evlist->threads->nr > 1 ||
2290 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2291again:
efd5745e 2292 before = trace->nr_events;
514f1c67
ACM
2293
2294 for (i = 0; i < evlist->nr_mmaps; i++) {
2295 union perf_event *event;
2296
2297 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2298 struct perf_sample sample;
514f1c67 2299
efd5745e 2300 ++trace->nr_events;
514f1c67 2301
514f1c67
ACM
2302 err = perf_evlist__parse_sample(evlist, event, &sample);
2303 if (err) {
c24ff998 2304 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2305 goto next_event;
514f1c67
ACM
2306 }
2307
ddbb1b13 2308 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2309next_event:
2310 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2311
ba209f85
ACM
2312 if (interrupted)
2313 goto out_disable;
02ac5421
ACM
2314
2315 if (done && !draining) {
2316 perf_evlist__disable(evlist);
2317 draining = true;
2318 }
514f1c67
ACM
2319 }
2320 }
2321
efd5745e 2322 if (trace->nr_events == before) {
ba209f85 2323 int timeout = done ? 100 : -1;
f15eb531 2324
46fb3c21
ACM
2325 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2326 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2327 draining = true;
2328
ba209f85 2329 goto again;
46fb3c21 2330 }
ba209f85
ACM
2331 } else {
2332 goto again;
f15eb531
NK
2333 }
2334
ba209f85 2335out_disable:
f3b623b8
ACM
2336 thread__zput(trace->current);
2337
ba209f85 2338 perf_evlist__disable(evlist);
514f1c67 2339
c522739d
ACM
2340 if (!err) {
2341 if (trace->summary)
2342 trace__fprintf_thread_summary(trace, trace->output);
2343
2344 if (trace->show_tool_stats) {
2345 fprintf(trace->output, "Stats:\n "
2346 " vfs_getname : %" PRIu64 "\n"
2347 " proc_getname: %" PRIu64 "\n",
2348 trace->stats.vfs_getname,
2349 trace->stats.proc_getname);
2350 }
2351 }
bf2575c1 2352
514f1c67
ACM
2353out_delete_evlist:
2354 perf_evlist__delete(evlist);
14a052df 2355 trace->evlist = NULL;
75b757ca 2356 trace->live = false;
514f1c67 2357 return err;
6ef068cb
ACM
2358{
2359 char errbuf[BUFSIZ];
a8f23d8f 2360
2cc990ba 2361out_error_sched_stat_runtime:
988bdb31 2362 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2363 goto out_error;
2364
801c67b0 2365out_error_raw_syscalls:
988bdb31 2366 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2367 goto out_error;
2368
e09b18d4
ACM
2369out_error_mmap:
2370 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2371 goto out_error;
2372
a8f23d8f
ACM
2373out_error_open:
2374 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2375
2376out_error:
6ef068cb 2377 fprintf(trace->output, "%s\n", errbuf);
87f91868 2378 goto out_delete_evlist;
94ad89bc
ACM
2379
2380out_error_apply_filters:
2381 fprintf(trace->output,
2382 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2383 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2384 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2385 goto out_delete_evlist;
514f1c67 2386}
5ed08dae
ACM
2387out_error_mem:
2388 fprintf(trace->output, "Not enough memory to run!\n");
2389 goto out_delete_evlist;
19867b61
ACM
2390
2391out_errno:
2392 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2393 goto out_delete_evlist;
a8f23d8f 2394}
514f1c67 2395
6810fc91
DA
2396static int trace__replay(struct trace *trace)
2397{
2398 const struct perf_evsel_str_handler handlers[] = {
c522739d 2399 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2400 };
f5fc1412
JO
2401 struct perf_data_file file = {
2402 .path = input_name,
2403 .mode = PERF_DATA_MODE_READ,
e366a6d8 2404 .force = trace->force,
f5fc1412 2405 };
6810fc91 2406 struct perf_session *session;
003824e8 2407 struct perf_evsel *evsel;
6810fc91
DA
2408 int err = -1;
2409
2410 trace->tool.sample = trace__process_sample;
2411 trace->tool.mmap = perf_event__process_mmap;
384c671e 2412 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2413 trace->tool.comm = perf_event__process_comm;
2414 trace->tool.exit = perf_event__process_exit;
2415 trace->tool.fork = perf_event__process_fork;
2416 trace->tool.attr = perf_event__process_attr;
2417 trace->tool.tracing_data = perf_event__process_tracing_data;
2418 trace->tool.build_id = perf_event__process_build_id;
2419
0a8cb85c 2420 trace->tool.ordered_events = true;
6810fc91
DA
2421 trace->tool.ordering_requires_timestamps = true;
2422
2423 /* add tid to output */
2424 trace->multiple_threads = true;
2425
f5fc1412 2426 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2427 if (session == NULL)
52e02834 2428 return -1;
6810fc91 2429
aa07df6e
DA
2430 if (trace->opts.target.pid)
2431 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2432
2433 if (trace->opts.target.tid)
2434 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2435
0a7e6d1b 2436 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2437 goto out;
2438
8fb598e5
DA
2439 trace->host = &session->machines.host;
2440
6810fc91
DA
2441 err = perf_session__set_tracepoints_handlers(session, handlers);
2442 if (err)
2443 goto out;
2444
003824e8
NK
2445 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2446 "raw_syscalls:sys_enter");
9aca7f17
DA
2447 /* older kernels have syscalls tp versus raw_syscalls */
2448 if (evsel == NULL)
2449 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2450 "syscalls:sys_enter");
003824e8 2451
e281a960
SF
2452 if (evsel &&
2453 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2454 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2455 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2456 goto out;
2457 }
2458
2459 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2460 "raw_syscalls:sys_exit");
9aca7f17
DA
2461 if (evsel == NULL)
2462 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2463 "syscalls:sys_exit");
e281a960
SF
2464 if (evsel &&
2465 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2466 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2467 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2468 goto out;
2469 }
2470
e5cadb93 2471 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2472 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2473 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2474 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2475 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2476 evsel->handler = trace__pgfault;
2477 }
2478
6810fc91
DA
2479 setup_pager();
2480
b7b61cbe 2481 err = perf_session__process_events(session);
6810fc91
DA
2482 if (err)
2483 pr_err("Failed to process events, error %d", err);
2484
bf2575c1
DA
2485 else if (trace->summary)
2486 trace__fprintf_thread_summary(trace, trace->output);
2487
6810fc91
DA
2488out:
2489 perf_session__delete(session);
2490
2491 return err;
2492}
2493
1302d88e
ACM
2494static size_t trace__fprintf_threads_header(FILE *fp)
2495{
2496 size_t printed;
2497
99ff7150 2498 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2499
2500 return printed;
2501}
2502
b535d523
ACM
2503DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2504 struct stats *stats;
2505 double msecs;
2506 int syscall;
2507)
2508{
2509 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2510 struct stats *stats = source->priv;
2511
2512 entry->syscall = source->i;
2513 entry->stats = stats;
2514 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2515}
2516
bf2575c1
DA
2517static size_t thread__dump_stats(struct thread_trace *ttrace,
2518 struct trace *trace, FILE *fp)
2519{
bf2575c1
DA
2520 size_t printed = 0;
2521 struct syscall *sc;
b535d523
ACM
2522 struct rb_node *nd;
2523 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2524
b535d523 2525 if (syscall_stats == NULL)
bf2575c1
DA
2526 return 0;
2527
2528 printed += fprintf(fp, "\n");
2529
834fd46d
MW
2530 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2531 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2532 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2533
98a91837 2534 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2535 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2536 if (stats) {
2537 double min = (double)(stats->min) / NSEC_PER_MSEC;
2538 double max = (double)(stats->max) / NSEC_PER_MSEC;
2539 double avg = avg_stats(stats);
2540 double pct;
2541 u64 n = (u64) stats->n;
2542
2543 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2544 avg /= NSEC_PER_MSEC;
2545
b535d523 2546 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2547 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2548 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2549 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2550 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2551 }
bf2575c1
DA
2552 }
2553
b535d523 2554 resort_rb__delete(syscall_stats);
bf2575c1 2555 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2556
2557 return printed;
2558}
2559
96c14451 2560static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2561{
96c14451 2562 size_t printed = 0;
89dceb22 2563 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2564 double ratio;
2565
2566 if (ttrace == NULL)
2567 return 0;
2568
2569 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2570
15e65c69 2571 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2572 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2573 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2574 if (ttrace->pfmaj)
2575 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2576 if (ttrace->pfmin)
2577 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2578 if (trace->sched)
2579 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2580 else if (fputc('\n', fp) != EOF)
2581 ++printed;
2582
bf2575c1 2583 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2584
96c14451
ACM
2585 return printed;
2586}
896cbb56 2587
96c14451
ACM
2588static unsigned long thread__nr_events(struct thread_trace *ttrace)
2589{
2590 return ttrace ? ttrace->nr_events : 0;
2591}
2592
2593DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2594 struct thread *thread;
2595)
2596{
2597 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2598}
2599
1302d88e
ACM
2600static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2601{
96c14451
ACM
2602 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2603 size_t printed = trace__fprintf_threads_header(fp);
2604 struct rb_node *nd;
1302d88e 2605
96c14451
ACM
2606 if (threads == NULL) {
2607 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2608 return 0;
2609 }
2610
98a91837 2611 resort_rb__for_each_entry(nd, threads)
96c14451 2612 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2613
96c14451
ACM
2614 resort_rb__delete(threads);
2615
2616 return printed;
1302d88e
ACM
2617}
2618
ae9ed035
ACM
2619static int trace__set_duration(const struct option *opt, const char *str,
2620 int unset __maybe_unused)
2621{
2622 struct trace *trace = opt->value;
2623
2624 trace->duration_filter = atof(str);
2625 return 0;
2626}
2627
f078c385
ACM
2628static int trace__set_filter_pids(const struct option *opt, const char *str,
2629 int unset __maybe_unused)
2630{
2631 int ret = -1;
2632 size_t i;
2633 struct trace *trace = opt->value;
2634 /*
2635 * FIXME: introduce a intarray class, plain parse csv and create a
2636 * { int nr, int entries[] } struct...
2637 */
2638 struct intlist *list = intlist__new(str);
2639
2640 if (list == NULL)
2641 return -1;
2642
2643 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2644 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2645
2646 if (trace->filter_pids.entries == NULL)
2647 goto out;
2648
2649 trace->filter_pids.entries[0] = getpid();
2650
2651 for (i = 1; i < trace->filter_pids.nr; ++i)
2652 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2653
2654 intlist__delete(list);
2655 ret = 0;
2656out:
2657 return ret;
2658}
2659
c24ff998
ACM
2660static int trace__open_output(struct trace *trace, const char *filename)
2661{
2662 struct stat st;
2663
2664 if (!stat(filename, &st) && st.st_size) {
2665 char oldname[PATH_MAX];
2666
2667 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2668 unlink(oldname);
2669 rename(filename, oldname);
2670 }
2671
2672 trace->output = fopen(filename, "w");
2673
2674 return trace->output == NULL ? -errno : 0;
2675}
2676
598d02c5
SF
2677static int parse_pagefaults(const struct option *opt, const char *str,
2678 int unset __maybe_unused)
2679{
2680 int *trace_pgfaults = opt->value;
2681
2682 if (strcmp(str, "all") == 0)
2683 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2684 else if (strcmp(str, "maj") == 0)
2685 *trace_pgfaults |= TRACE_PFMAJ;
2686 else if (strcmp(str, "min") == 0)
2687 *trace_pgfaults |= TRACE_PFMIN;
2688 else
2689 return -1;
2690
2691 return 0;
2692}
2693
14a052df
ACM
2694static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2695{
2696 struct perf_evsel *evsel;
2697
e5cadb93 2698 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2699 evsel->handler = handler;
2700}
2701
514f1c67
ACM
2702int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
2703{
6fdd9cb7 2704 const char *trace_usage[] = {
f15eb531
NK
2705 "perf trace [<options>] [<command>]",
2706 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2707 "perf trace record [<options>] [<command>]",
2708 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2709 NULL
2710 };
2711 struct trace trace = {
514f1c67
ACM
2712 .syscalls = {
2713 . max = -1,
2714 },
2715 .opts = {
2716 .target = {
2717 .uid = UINT_MAX,
2718 .uses_mmap = true,
2719 },
2720 .user_freq = UINT_MAX,
2721 .user_interval = ULLONG_MAX,
509051ea 2722 .no_buffering = true,
38d5447d 2723 .mmap_pages = UINT_MAX,
9d9cad76 2724 .proc_map_timeout = 500,
514f1c67 2725 },
007d66a0 2726 .output = stderr,
50c95cbd 2727 .show_comm = true,
e281a960 2728 .trace_syscalls = true,
44621819 2729 .kernel_syscallchains = false,
05614993 2730 .max_stack = UINT_MAX,
514f1c67 2731 };
c24ff998 2732 const char *output_name = NULL;
2ae3a312 2733 const char *ev_qualifier_str = NULL;
514f1c67 2734 const struct option trace_options[] = {
14a052df
ACM
2735 OPT_CALLBACK(0, "event", &trace.evlist, "event",
2736 "event selector. use 'perf list' to list available events",
2737 parse_events_option),
50c95cbd
ACM
2738 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2739 "show the thread COMM next to its id"),
c522739d 2740 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
d303e85a 2741 OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
c24ff998 2742 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2743 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2744 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2745 "trace events on existing process id"),
ac9be8ee 2746 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2747 "trace events on existing thread id"),
fa0e4ffe
ACM
2748 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2749 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2750 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2751 "system-wide collection from all CPUs"),
ac9be8ee 2752 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2753 "list of cpus to monitor"),
6810fc91 2754 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2755 "child tasks do not inherit counters"),
994a1f78
JO
2756 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2757 "number of mmap data pages",
2758 perf_evlist__parse_mmap_pages),
ac9be8ee 2759 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2760 "user to profile"),
ae9ed035
ACM
2761 OPT_CALLBACK(0, "duration", &trace, "float",
2762 "show only events with duration > N.M ms",
2763 trace__set_duration),
1302d88e 2764 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2765 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2766 OPT_BOOLEAN('T', "time", &trace.full_time,
2767 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2768 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2769 "Show only syscall summary with statistics"),
2770 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2771 "Show all syscalls and summary with statistics"),
598d02c5
SF
2772 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2773 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2774 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2775 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2776 OPT_CALLBACK(0, "call-graph", &trace.opts,
2777 "record_mode[,record_size]", record_callchain_help,
2778 &record_parse_callchain_opt),
44621819
ACM
2779 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2780 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2781 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2782 "Set the minimum stack depth when parsing the callchain, "
2783 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2784 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2785 "Set the maximum stack depth when parsing the callchain, "
2786 "anything beyond the specified depth will be ignored. "
4cb93446 2787 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2788 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2789 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2790 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2791 "ms to wait before starting measurement after program "
2792 "start"),
514f1c67
ACM
2793 OPT_END()
2794 };
ccd62a89 2795 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2796 bool mmap_pages_user_set = true;
6fdd9cb7 2797 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2798 int err;
32caf0d1 2799 char bf[BUFSIZ];
514f1c67 2800
4d08cb80
ACM
2801 signal(SIGSEGV, sighandler_dump_stack);
2802 signal(SIGFPE, sighandler_dump_stack);
2803
14a052df 2804 trace.evlist = perf_evlist__new();
fd0db102 2805 trace.sctbl = syscalltbl__new();
14a052df 2806
fd0db102 2807 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2808 pr_err("Not enough memory to run!\n");
ff8f695c 2809 err = -ENOMEM;
14a052df
ACM
2810 goto out;
2811 }
2812
6fdd9cb7
YS
2813 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2814 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2815
d7888573
WN
2816 err = bpf__setup_stdout(trace.evlist);
2817 if (err) {
2818 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2819 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2820 goto out;
2821 }
2822
59247e33
ACM
2823 err = -1;
2824
598d02c5
SF
2825 if (trace.trace_pgfaults) {
2826 trace.opts.sample_address = true;
2827 trace.opts.sample_time = true;
2828 }
2829
f3e459d1
ACM
2830 if (trace.opts.mmap_pages == UINT_MAX)
2831 mmap_pages_user_set = false;
2832
05614993 2833 if (trace.max_stack == UINT_MAX) {
fe176085 2834 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2835 max_stack_user_set = false;
2836 }
2837
2838#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2839 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2840 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2841#endif
2842
2ddd5c04 2843 if (callchain_param.enabled) {
f3e459d1
ACM
2844 if (!mmap_pages_user_set && geteuid() == 0)
2845 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2846
566a0885 2847 symbol_conf.use_callchain = true;
f3e459d1 2848 }
566a0885 2849
14a052df
ACM
2850 if (trace.evlist->nr_entries > 0)
2851 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2852
1e28fe0a
SF
2853 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2854 return trace__record(&trace, argc-1, &argv[1]);
2855
2856 /* summary_only implies summary option, but don't overwrite summary if set */
2857 if (trace.summary_only)
2858 trace.summary = trace.summary_only;
2859
726f3234
ACM
2860 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2861 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2862 pr_err("Please specify something to trace.\n");
2863 return -1;
2864 }
2865
59247e33
ACM
2866 if (!trace.trace_syscalls && ev_qualifier_str) {
2867 pr_err("The -e option can't be used with --no-syscalls.\n");
2868 goto out;
2869 }
2870
c24ff998
ACM
2871 if (output_name != NULL) {
2872 err = trace__open_output(&trace, output_name);
2873 if (err < 0) {
2874 perror("failed to create output file");
2875 goto out;
2876 }
2877 }
2878
fd0db102
ACM
2879 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2880
2ae3a312 2881 if (ev_qualifier_str != NULL) {
b059efdf 2882 const char *s = ev_qualifier_str;
005438a8
ACM
2883 struct strlist_config slist_config = {
2884 .dirname = system_path(STRACE_GROUPS_DIR),
2885 };
b059efdf
ACM
2886
2887 trace.not_ev_qualifier = *s == '!';
2888 if (trace.not_ev_qualifier)
2889 ++s;
005438a8 2890 trace.ev_qualifier = strlist__new(s, &slist_config);
2ae3a312 2891 if (trace.ev_qualifier == NULL) {
c24ff998
ACM
2892 fputs("Not enough memory to parse event qualifier",
2893 trace.output);
2894 err = -ENOMEM;
2895 goto out_close;
2ae3a312 2896 }
d0cc439b
ACM
2897
2898 err = trace__validate_ev_qualifier(&trace);
2899 if (err)
2900 goto out_close;
2ae3a312
ACM
2901 }
2902
602ad878 2903 err = target__validate(&trace.opts.target);
32caf0d1 2904 if (err) {
602ad878 2905 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2906 fprintf(trace.output, "%s", bf);
2907 goto out_close;
32caf0d1
NK
2908 }
2909
602ad878 2910 err = target__parse_uid(&trace.opts.target);
514f1c67 2911 if (err) {
602ad878 2912 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
2913 fprintf(trace.output, "%s", bf);
2914 goto out_close;
514f1c67
ACM
2915 }
2916
602ad878 2917 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
2918 trace.opts.target.system_wide = true;
2919
6810fc91
DA
2920 if (input_name)
2921 err = trace__replay(&trace);
2922 else
2923 err = trace__run(&trace, argc, argv);
1302d88e 2924
c24ff998
ACM
2925out_close:
2926 if (output_name != NULL)
2927 fclose(trace.output);
2928out:
1302d88e 2929 return err;
514f1c67 2930}