]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - tools/perf/builtin-trace.c
tools include: Include missing headers for fls() and types in linux/log2.h
[mirror_ubuntu-focal-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
514f1c67 24#include "util/evlist.h"
4b6ab94e 25#include <subcmd/exec-cmd.h>
752fde44 26#include "util/machine.h"
6810fc91 27#include "util/session.h"
752fde44 28#include "util/thread.h"
4b6ab94e 29#include <subcmd/parse-options.h>
2ae3a312 30#include "util/strlist.h"
bdc89661 31#include "util/intlist.h"
514f1c67 32#include "util/thread_map.h"
bf2575c1 33#include "util/stat.h"
fd5cead2 34#include "trace/beauty/beauty.h"
97978b3e 35#include "trace-event.h"
9aca7f17 36#include "util/parse-events.h"
ba504235 37#include "util/bpf-loader.h"
566a0885 38#include "callchain.h"
fd0db102 39#include "syscalltbl.h"
96c14451 40#include "rb_resort.h"
514f1c67 41
fd20e811 42#include <inttypes.h>
fd0db102 43#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
514f1c67 44#include <stdlib.h>
017037ff 45#include <string.h>
8dd2a131 46#include <linux/err.h>
997bba8c
ACM
47#include <linux/filter.h>
48#include <linux/audit.h>
877a7a11 49#include <linux/kernel.h>
39878d49 50#include <linux/random.h>
c6d4a494 51#include <linux/stringify.h>
bd48c63e 52#include <linux/time64.h>
514f1c67 53
3d689ed6
ACM
54#include "sane_ctype.h"
55
c188e7ac
ACM
56#ifndef O_CLOEXEC
57# define O_CLOEXEC 02000000
58#endif
59
d1d438a3
ACM
60struct trace {
61 struct perf_tool tool;
fd0db102 62 struct syscalltbl *sctbl;
d1d438a3
ACM
63 struct {
64 int max;
65 struct syscall *table;
66 struct {
67 struct perf_evsel *sys_enter,
68 *sys_exit;
69 } events;
70 } syscalls;
71 struct record_opts opts;
72 struct perf_evlist *evlist;
73 struct machine *host;
74 struct thread *current;
75 u64 base_time;
76 FILE *output;
77 unsigned long nr_events;
78 struct strlist *ev_qualifier;
79 struct {
80 size_t nr;
81 int *entries;
82 } ev_qualifier_ids;
d1d438a3
ACM
83 struct {
84 size_t nr;
85 pid_t *entries;
86 } filter_pids;
87 double duration_filter;
88 double runtime_ms;
89 struct {
90 u64 vfs_getname,
91 proc_getname;
92 } stats;
c6d4a494 93 unsigned int max_stack;
5cf9c84e 94 unsigned int min_stack;
d1d438a3
ACM
95 bool not_ev_qualifier;
96 bool live;
97 bool full_time;
98 bool sched;
99 bool multiple_threads;
100 bool summary;
101 bool summary_only;
102 bool show_comm;
103 bool show_tool_stats;
104 bool trace_syscalls;
44621819 105 bool kernel_syscallchains;
d1d438a3
ACM
106 bool force;
107 bool vfs_getname;
108 int trace_pgfaults;
fd0db102 109 int open_id;
d1d438a3 110};
a1c2552d 111
77170988
ACM
112struct tp_field {
113 int offset;
114 union {
115 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
116 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
117 };
118};
119
120#define TP_UINT_FIELD(bits) \
121static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
122{ \
55d43bca
DA
123 u##bits value; \
124 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
125 return value; \
77170988
ACM
126}
127
128TP_UINT_FIELD(8);
129TP_UINT_FIELD(16);
130TP_UINT_FIELD(32);
131TP_UINT_FIELD(64);
132
133#define TP_UINT_FIELD__SWAPPED(bits) \
134static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
135{ \
55d43bca
DA
136 u##bits value; \
137 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
138 return bswap_##bits(value);\
139}
140
141TP_UINT_FIELD__SWAPPED(16);
142TP_UINT_FIELD__SWAPPED(32);
143TP_UINT_FIELD__SWAPPED(64);
144
145static int tp_field__init_uint(struct tp_field *field,
146 struct format_field *format_field,
147 bool needs_swap)
148{
149 field->offset = format_field->offset;
150
151 switch (format_field->size) {
152 case 1:
153 field->integer = tp_field__u8;
154 break;
155 case 2:
156 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
157 break;
158 case 4:
159 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
160 break;
161 case 8:
162 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
163 break;
164 default:
165 return -1;
166 }
167
168 return 0;
169}
170
171static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
172{
173 return sample->raw_data + field->offset;
174}
175
176static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
177{
178 field->offset = format_field->offset;
179 field->pointer = tp_field__ptr;
180 return 0;
181}
182
183struct syscall_tp {
184 struct tp_field id;
185 union {
186 struct tp_field args, ret;
187 };
188};
189
190static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
191 struct tp_field *field,
192 const char *name)
193{
194 struct format_field *format_field = perf_evsel__field(evsel, name);
195
196 if (format_field == NULL)
197 return -1;
198
199 return tp_field__init_uint(field, format_field, evsel->needs_swap);
200}
201
202#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
203 ({ struct syscall_tp *sc = evsel->priv;\
204 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
205
206static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
207 struct tp_field *field,
208 const char *name)
209{
210 struct format_field *format_field = perf_evsel__field(evsel, name);
211
212 if (format_field == NULL)
213 return -1;
214
215 return tp_field__init_ptr(field, format_field);
216}
217
218#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
219 ({ struct syscall_tp *sc = evsel->priv;\
220 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
221
222static void perf_evsel__delete_priv(struct perf_evsel *evsel)
223{
04662523 224 zfree(&evsel->priv);
77170988
ACM
225 perf_evsel__delete(evsel);
226}
227
96695d44
NK
228static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
229{
230 evsel->priv = malloc(sizeof(struct syscall_tp));
231 if (evsel->priv != NULL) {
232 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
233 goto out_delete;
234
235 evsel->handler = handler;
236 return 0;
237 }
238
239 return -ENOMEM;
240
241out_delete:
04662523 242 zfree(&evsel->priv);
96695d44
NK
243 return -ENOENT;
244}
245
ef503831 246static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 247{
ef503831 248 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 249
9aca7f17 250 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 251 if (IS_ERR(evsel))
9aca7f17
DA
252 evsel = perf_evsel__newtp("syscalls", direction);
253
8dd2a131
JO
254 if (IS_ERR(evsel))
255 return NULL;
256
257 if (perf_evsel__init_syscall_tp(evsel, handler))
258 goto out_delete;
77170988
ACM
259
260 return evsel;
261
262out_delete:
263 perf_evsel__delete_priv(evsel);
264 return NULL;
265}
266
267#define perf_evsel__sc_tp_uint(evsel, name, sample) \
268 ({ struct syscall_tp *fields = evsel->priv; \
269 fields->name.integer(&fields->name, sample); })
270
271#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
272 ({ struct syscall_tp *fields = evsel->priv; \
273 fields->name.pointer(&fields->name, sample); })
274
1f115cb7 275struct strarray {
03e3adc9 276 int offset;
1f115cb7
ACM
277 int nr_entries;
278 const char **entries;
279};
280
281#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
282 .nr_entries = ARRAY_SIZE(array), \
283 .entries = array, \
284}
285
03e3adc9
ACM
286#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
287 .offset = off, \
288 .nr_entries = ARRAY_SIZE(array), \
289 .entries = array, \
290}
291
975b7c2f
ACM
292static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
293 const char *intfmt,
294 struct syscall_arg *arg)
1f115cb7 295{
1f115cb7 296 struct strarray *sa = arg->parm;
03e3adc9 297 int idx = arg->val - sa->offset;
1f115cb7
ACM
298
299 if (idx < 0 || idx >= sa->nr_entries)
975b7c2f 300 return scnprintf(bf, size, intfmt, arg->val);
1f115cb7
ACM
301
302 return scnprintf(bf, size, "%s", sa->entries[idx]);
303}
304
975b7c2f
ACM
305static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
306 struct syscall_arg *arg)
307{
308 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
309}
310
1f115cb7
ACM
311#define SCA_STRARRAY syscall_arg__scnprintf_strarray
312
844ae5b4
ACM
313#if defined(__i386__) || defined(__x86_64__)
314/*
315 * FIXME: Make this available to all arches as soon as the ioctl beautifier
316 * gets rewritten to support all arches.
317 */
78645cf3
ACM
318static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
319 struct syscall_arg *arg)
320{
321 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
322}
323
324#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 325#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 326
75b757ca
ACM
327static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
328 struct syscall_arg *arg);
329
330#define SCA_FD syscall_arg__scnprintf_fd
331
48e1f91a
ACM
332#ifndef AT_FDCWD
333#define AT_FDCWD -100
334#endif
335
75b757ca
ACM
336static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
337 struct syscall_arg *arg)
338{
339 int fd = arg->val;
340
341 if (fd == AT_FDCWD)
342 return scnprintf(bf, size, "CWD");
343
344 return syscall_arg__scnprintf_fd(bf, size, arg);
345}
346
347#define SCA_FDAT syscall_arg__scnprintf_fd_at
348
349static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
350 struct syscall_arg *arg);
351
352#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
353
6e7eeb51 354static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
01533e97 355 struct syscall_arg *arg)
13d4ff3e 356{
01533e97 357 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
358}
359
beccb2b5
ACM
360#define SCA_HEX syscall_arg__scnprintf_hex
361
a1c2552d
ACM
362static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
363 struct syscall_arg *arg)
364{
365 return scnprintf(bf, size, "%d", arg->val);
366}
367
368#define SCA_INT syscall_arg__scnprintf_int
369
729a7841
ACM
370static const char *bpf_cmd[] = {
371 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
372 "MAP_GET_NEXT_KEY", "PROG_LOAD",
373};
374static DEFINE_STRARRAY(bpf_cmd);
375
03e3adc9
ACM
376static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
377static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 378
1f115cb7
ACM
379static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
380static DEFINE_STRARRAY(itimers);
381
b62bee1b
ACM
382static const char *keyctl_options[] = {
383 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
384 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
385 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
386 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
387 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
388};
389static DEFINE_STRARRAY(keyctl_options);
390
efe6b882
ACM
391static const char *whences[] = { "SET", "CUR", "END",
392#ifdef SEEK_DATA
393"DATA",
394#endif
395#ifdef SEEK_HOLE
396"HOLE",
397#endif
398};
399static DEFINE_STRARRAY(whences);
f9da0b0c 400
80f587d5
ACM
401static const char *fcntl_cmds[] = {
402 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
403 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "F_GETLK64",
404 "F_SETLK64", "F_SETLKW64", "F_SETOWN_EX", "F_GETOWN_EX",
405 "F_GETOWNER_UIDS",
406};
407static DEFINE_STRARRAY(fcntl_cmds);
408
c045bf02
ACM
409static const char *rlimit_resources[] = {
410 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
411 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
412 "RTTIME",
413};
414static DEFINE_STRARRAY(rlimit_resources);
415
eb5b1b14
ACM
416static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
417static DEFINE_STRARRAY(sighow);
418
4f8c1b74
DA
419static const char *clockid[] = {
420 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
421 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
422 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
423};
424static DEFINE_STRARRAY(clockid);
425
e10bce81
ACM
426static const char *socket_families[] = {
427 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
428 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
429 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
430 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
431 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
432 "ALG", "NFC", "VSOCK",
433};
434static DEFINE_STRARRAY(socket_families);
435
51108999
ACM
436static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
437 struct syscall_arg *arg)
438{
439 size_t printed = 0;
440 int mode = arg->val;
441
442 if (mode == F_OK) /* 0 */
443 return scnprintf(bf, size, "F");
444#define P_MODE(n) \
445 if (mode & n##_OK) { \
446 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
447 mode &= ~n##_OK; \
448 }
449
450 P_MODE(R);
451 P_MODE(W);
452 P_MODE(X);
453#undef P_MODE
454
455 if (mode)
456 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
457
458 return printed;
459}
460
461#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
462
f994592d
ACM
463static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
464 struct syscall_arg *arg);
465
466#define SCA_FILENAME syscall_arg__scnprintf_filename
467
46cce19b
ACM
468static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
469 struct syscall_arg *arg)
470{
471 int printed = 0, flags = arg->val;
472
473#define P_FLAG(n) \
474 if (flags & O_##n) { \
475 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
476 flags &= ~O_##n; \
477 }
478
479 P_FLAG(CLOEXEC);
480 P_FLAG(NONBLOCK);
481#undef P_FLAG
482
483 if (flags)
484 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
485
486 return printed;
487}
488
489#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
490
844ae5b4
ACM
491#if defined(__i386__) || defined(__x86_64__)
492/*
493 * FIXME: Make this available to all arches.
494 */
78645cf3
ACM
495#define TCGETS 0x5401
496
497static const char *tioctls[] = {
498 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
499 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
500 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
501 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
502 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
503 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
504 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
505 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
506 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
507 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
508 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
509 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
510 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
511 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
512 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
513};
514
515static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 516#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 517
a355a61e
ACM
518#ifndef GRND_NONBLOCK
519#define GRND_NONBLOCK 0x0001
520#endif
521#ifndef GRND_RANDOM
522#define GRND_RANDOM 0x0002
523#endif
524
39878d49
ACM
525static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
526 struct syscall_arg *arg)
527{
528 int printed = 0, flags = arg->val;
529
530#define P_FLAG(n) \
531 if (flags & GRND_##n) { \
532 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
533 flags &= ~GRND_##n; \
534 }
535
536 P_FLAG(RANDOM);
537 P_FLAG(NONBLOCK);
538#undef P_FLAG
539
540 if (flags)
541 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
542
543 return printed;
544}
545
546#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
547
453350dd
ACM
548#define STRARRAY(arg, name, array) \
549 .arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
550 .arg_parm = { [arg] = &strarray__##array, }
551
ea8dc3ce 552#include "trace/beauty/eventfd.c"
8bf382ce 553#include "trace/beauty/flock.c"
d5d71e86 554#include "trace/beauty/futex_op.c"
df4cb167 555#include "trace/beauty/mmap.c"
ba2f22cf 556#include "trace/beauty/mode_t.c"
a30e6259 557#include "trace/beauty/msg_flags.c"
8f48df69 558#include "trace/beauty/open_flags.c"
62de344e 559#include "trace/beauty/perf_event_open.c"
d5d71e86 560#include "trace/beauty/pid.c"
a3bca91f 561#include "trace/beauty/sched_policy.c"
f5cd95ea 562#include "trace/beauty/seccomp.c"
12199d8e 563#include "trace/beauty/signum.c"
bbf86c43 564#include "trace/beauty/socket_type.c"
7206b900 565#include "trace/beauty/waitid_options.c"
a3bca91f 566
514f1c67
ACM
567static struct syscall_fmt {
568 const char *name;
aec1930b 569 const char *alias;
01533e97 570 size_t (*arg_scnprintf[6])(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 571 void *arg_parm[6];
514f1c67 572 bool errmsg;
11c8e39f 573 bool errpid;
514f1c67 574 bool timeout;
04b34729 575 bool hexret;
514f1c67 576} syscall_fmts[] = {
51108999 577 { .name = "access", .errmsg = true,
12f3ca4f 578 .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, },
aec1930b 579 { .name = "arch_prctl", .errmsg = true, .alias = "prctl", },
729a7841 580 { .name = "bpf", .errmsg = true, STRARRAY(0, cmd, bpf_cmd), },
beccb2b5
ACM
581 { .name = "brk", .hexret = true,
582 .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, },
12f3ca4f
ACM
583 { .name = "chdir", .errmsg = true, },
584 { .name = "chmod", .errmsg = true, },
585 { .name = "chroot", .errmsg = true, },
4f8c1b74 586 { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), },
11c8e39f 587 { .name = "clone", .errpid = true, },
75b757ca 588 { .name = "close", .errmsg = true,
48000a1a 589 .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, },
a14bb860 590 { .name = "connect", .errmsg = true, },
12f3ca4f 591 { .name = "creat", .errmsg = true, },
b6565c90
ACM
592 { .name = "dup", .errmsg = true, },
593 { .name = "dup2", .errmsg = true, },
594 { .name = "dup3", .errmsg = true, },
453350dd 595 { .name = "epoll_ctl", .errmsg = true, STRARRAY(1, op, epoll_ctl_ops), },
49af9e93
ACM
596 { .name = "eventfd2", .errmsg = true,
597 .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, },
12f3ca4f 598 { .name = "faccessat", .errmsg = true, },
b6565c90
ACM
599 { .name = "fadvise64", .errmsg = true, },
600 { .name = "fallocate", .errmsg = true, },
601 { .name = "fchdir", .errmsg = true, },
602 { .name = "fchmod", .errmsg = true, },
75b757ca 603 { .name = "fchmodat", .errmsg = true,
12f3ca4f 604 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90 605 { .name = "fchown", .errmsg = true, },
75b757ca 606 { .name = "fchownat", .errmsg = true,
12f3ca4f 607 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
75b757ca 608 { .name = "fcntl", .errmsg = true,
b6565c90 609 .arg_scnprintf = { [1] = SCA_STRARRAY, /* cmd */ },
75b757ca 610 .arg_parm = { [1] = &strarray__fcntl_cmds, /* cmd */ }, },
b6565c90 611 { .name = "fdatasync", .errmsg = true, },
5cea6ff2 612 { .name = "flock", .errmsg = true,
b6565c90
ACM
613 .arg_scnprintf = { [1] = SCA_FLOCK, /* cmd */ }, },
614 { .name = "fsetxattr", .errmsg = true, },
615 { .name = "fstat", .errmsg = true, .alias = "newfstat", },
12f3ca4f 616 { .name = "fstatat", .errmsg = true, .alias = "newfstatat", },
b6565c90
ACM
617 { .name = "fstatfs", .errmsg = true, },
618 { .name = "fsync", .errmsg = true, },
619 { .name = "ftruncate", .errmsg = true, },
f9da0b0c
ACM
620 { .name = "futex", .errmsg = true,
621 .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, },
75b757ca 622 { .name = "futimesat", .errmsg = true,
12f3ca4f 623 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
b6565c90
ACM
624 { .name = "getdents", .errmsg = true, },
625 { .name = "getdents64", .errmsg = true, },
453350dd 626 { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), },
c65f1070 627 { .name = "getpid", .errpid = true, },
d1d438a3 628 { .name = "getpgid", .errpid = true, },
c65f1070 629 { .name = "getppid", .errpid = true, },
39878d49
ACM
630 { .name = "getrandom", .errmsg = true,
631 .arg_scnprintf = { [2] = SCA_GETRANDOM_FLAGS, /* flags */ }, },
453350dd 632 { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f
ACM
633 { .name = "getxattr", .errmsg = true, },
634 { .name = "inotify_add_watch", .errmsg = true, },
beccb2b5 635 { .name = "ioctl", .errmsg = true,
b6565c90 636 .arg_scnprintf = {
844ae5b4
ACM
637#if defined(__i386__) || defined(__x86_64__)
638/*
639 * FIXME: Make this available to all arches.
640 */
78645cf3
ACM
641 [1] = SCA_STRHEXARRAY, /* cmd */
642 [2] = SCA_HEX, /* arg */ },
643 .arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
844ae5b4
ACM
644#else
645 [2] = SCA_HEX, /* arg */ }, },
646#endif
b62bee1b 647 { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), },
8bad5b0a
ACM
648 { .name = "kill", .errmsg = true,
649 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f
ACM
650 { .name = "lchown", .errmsg = true, },
651 { .name = "lgetxattr", .errmsg = true, },
75b757ca 652 { .name = "linkat", .errmsg = true,
48000a1a 653 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
12f3ca4f
ACM
654 { .name = "listxattr", .errmsg = true, },
655 { .name = "llistxattr", .errmsg = true, },
656 { .name = "lremovexattr", .errmsg = true, },
75b757ca 657 { .name = "lseek", .errmsg = true,
b6565c90 658 .arg_scnprintf = { [2] = SCA_STRARRAY, /* whence */ },
75b757ca 659 .arg_parm = { [2] = &strarray__whences, /* whence */ }, },
12f3ca4f
ACM
660 { .name = "lsetxattr", .errmsg = true, },
661 { .name = "lstat", .errmsg = true, .alias = "newlstat", },
662 { .name = "lsxattr", .errmsg = true, },
9e9716d1
ACM
663 { .name = "madvise", .errmsg = true,
664 .arg_scnprintf = { [0] = SCA_HEX, /* start */
665 [2] = SCA_MADV_BHV, /* behavior */ }, },
12f3ca4f 666 { .name = "mkdir", .errmsg = true, },
75b757ca 667 { .name = "mkdirat", .errmsg = true,
12f3ca4f
ACM
668 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
669 { .name = "mknod", .errmsg = true, },
75b757ca 670 { .name = "mknodat", .errmsg = true,
12f3ca4f 671 .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, },
3d903aa7
ACM
672 { .name = "mlock", .errmsg = true,
673 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
674 { .name = "mlockall", .errmsg = true,
675 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5 676 { .name = "mmap", .hexret = true,
ae685380 677 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
941557e0 678 [2] = SCA_MMAP_PROT, /* prot */
b6565c90 679 [3] = SCA_MMAP_FLAGS, /* flags */ }, },
beccb2b5 680 { .name = "mprotect", .errmsg = true,
ae685380
ACM
681 .arg_scnprintf = { [0] = SCA_HEX, /* start */
682 [2] = SCA_MMAP_PROT, /* prot */ }, },
090389b6
ACM
683 { .name = "mq_unlink", .errmsg = true,
684 .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, },
ae685380
ACM
685 { .name = "mremap", .hexret = true,
686 .arg_scnprintf = { [0] = SCA_HEX, /* addr */
86998dda 687 [3] = SCA_MREMAP_FLAGS, /* flags */
ae685380 688 [4] = SCA_HEX, /* new_addr */ }, },
3d903aa7
ACM
689 { .name = "munlock", .errmsg = true,
690 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
beccb2b5
ACM
691 { .name = "munmap", .errmsg = true,
692 .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, },
75b757ca 693 { .name = "name_to_handle_at", .errmsg = true,
48000a1a 694 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
75b757ca 695 { .name = "newfstatat", .errmsg = true,
12f3ca4f 696 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
be65a89a 697 { .name = "open", .errmsg = true,
12f3ca4f 698 .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 699 { .name = "open_by_handle_at", .errmsg = true,
75b757ca
ACM
700 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
701 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
31cd3855 702 { .name = "openat", .errmsg = true,
75b757ca
ACM
703 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
704 [2] = SCA_OPEN_FLAGS, /* flags */ }, },
a1c2552d 705 { .name = "perf_event_open", .errmsg = true,
ccd9b2a7 706 .arg_scnprintf = { [2] = SCA_INT, /* cpu */
a1c2552d
ACM
707 [3] = SCA_FD, /* group_fd */
708 [4] = SCA_PERF_FLAGS, /* flags */ }, },
46cce19b
ACM
709 { .name = "pipe2", .errmsg = true,
710 .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
aec1930b
ACM
711 { .name = "poll", .errmsg = true, .timeout = true, },
712 { .name = "ppoll", .errmsg = true, .timeout = true, },
b6565c90
ACM
713 { .name = "pread", .errmsg = true, .alias = "pread64", },
714 { .name = "preadv", .errmsg = true, .alias = "pread", },
453350dd 715 { .name = "prlimit64", .errmsg = true, STRARRAY(1, resource, rlimit_resources), },
b6565c90
ACM
716 { .name = "pwrite", .errmsg = true, .alias = "pwrite64", },
717 { .name = "pwritev", .errmsg = true, },
718 { .name = "read", .errmsg = true, },
12f3ca4f 719 { .name = "readlink", .errmsg = true, },
75b757ca 720 { .name = "readlinkat", .errmsg = true,
12f3ca4f 721 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
b6565c90 722 { .name = "readv", .errmsg = true, },
b2cc99fd 723 { .name = "recvfrom", .errmsg = true,
b6565c90 724 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 725 { .name = "recvmmsg", .errmsg = true,
b6565c90 726 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 727 { .name = "recvmsg", .errmsg = true,
b6565c90 728 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
12f3ca4f 729 { .name = "removexattr", .errmsg = true, },
75b757ca 730 { .name = "renameat", .errmsg = true,
48000a1a 731 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
12f3ca4f 732 { .name = "rmdir", .errmsg = true, },
8bad5b0a
ACM
733 { .name = "rt_sigaction", .errmsg = true,
734 .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, },
453350dd 735 { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), },
8bad5b0a
ACM
736 { .name = "rt_sigqueueinfo", .errmsg = true,
737 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
738 { .name = "rt_tgsigqueueinfo", .errmsg = true,
739 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
f0bbd602
ACM
740 { .name = "sched_getattr", .errmsg = true, },
741 { .name = "sched_setattr", .errmsg = true, },
a3bca91f
ACM
742 { .name = "sched_setscheduler", .errmsg = true,
743 .arg_scnprintf = { [1] = SCA_SCHED_POLICY, /* policy */ }, },
997bba8c
ACM
744 { .name = "seccomp", .errmsg = true,
745 .arg_scnprintf = { [0] = SCA_SECCOMP_OP, /* op */
746 [1] = SCA_SECCOMP_FLAGS, /* flags */ }, },
aec1930b 747 { .name = "select", .errmsg = true, .timeout = true, },
b2cc99fd 748 { .name = "sendmmsg", .errmsg = true,
b6565c90 749 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 750 { .name = "sendmsg", .errmsg = true,
b6565c90 751 .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, },
b2cc99fd 752 { .name = "sendto", .errmsg = true,
b6565c90 753 .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, },
c65f1070 754 { .name = "set_tid_address", .errpid = true, },
453350dd 755 { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), },
d1d438a3 756 { .name = "setpgid", .errmsg = true, },
453350dd 757 { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
12f3ca4f 758 { .name = "setxattr", .errmsg = true, },
b6565c90 759 { .name = "shutdown", .errmsg = true, },
e10bce81 760 { .name = "socket", .errmsg = true,
a28b24b2
ACM
761 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
762 [1] = SCA_SK_TYPE, /* type */ },
07120aa5
ACM
763 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
764 { .name = "socketpair", .errmsg = true,
765 .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */
766 [1] = SCA_SK_TYPE, /* type */ },
e10bce81 767 .arg_parm = { [0] = &strarray__socket_families, /* family */ }, },
12f3ca4f
ACM
768 { .name = "stat", .errmsg = true, .alias = "newstat", },
769 { .name = "statfs", .errmsg = true, },
fd5cead2
ACM
770 { .name = "statx", .errmsg = true,
771 .arg_scnprintf = { [0] = SCA_FDAT, /* flags */
772 [2] = SCA_STATX_FLAGS, /* flags */
773 [3] = SCA_STATX_MASK, /* mask */ }, },
34221118
ACM
774 { .name = "swapoff", .errmsg = true,
775 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
776 { .name = "swapon", .errmsg = true,
777 .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, },
75b757ca 778 { .name = "symlinkat", .errmsg = true,
48000a1a 779 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
8bad5b0a
ACM
780 { .name = "tgkill", .errmsg = true,
781 .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, },
782 { .name = "tkill", .errmsg = true,
783 .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
12f3ca4f 784 { .name = "truncate", .errmsg = true, },
e5959683 785 { .name = "uname", .errmsg = true, .alias = "newuname", },
75b757ca 786 { .name = "unlinkat", .errmsg = true,
12f3ca4f
ACM
787 .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, },
788 { .name = "utime", .errmsg = true, },
75b757ca 789 { .name = "utimensat", .errmsg = true,
12f3ca4f
ACM
790 .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, },
791 { .name = "utimes", .errmsg = true, },
b6565c90 792 { .name = "vmsplice", .errmsg = true, },
11c8e39f 793 { .name = "wait4", .errpid = true,
7206b900 794 .arg_scnprintf = { [2] = SCA_WAITID_OPTIONS, /* options */ }, },
11c8e39f 795 { .name = "waitid", .errpid = true,
7206b900 796 .arg_scnprintf = { [3] = SCA_WAITID_OPTIONS, /* options */ }, },
b6565c90
ACM
797 { .name = "write", .errmsg = true, },
798 { .name = "writev", .errmsg = true, },
514f1c67
ACM
799};
800
801static int syscall_fmt__cmp(const void *name, const void *fmtp)
802{
803 const struct syscall_fmt *fmt = fmtp;
804 return strcmp(name, fmt->name);
805}
806
807static struct syscall_fmt *syscall_fmt__find(const char *name)
808{
809 const int nmemb = ARRAY_SIZE(syscall_fmts);
810 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
811}
812
813struct syscall {
814 struct event_format *tp_format;
f208bd8d
ACM
815 int nr_args;
816 struct format_field *args;
514f1c67 817 const char *name;
5089f20e 818 bool is_exit;
514f1c67 819 struct syscall_fmt *fmt;
01533e97 820 size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
1f115cb7 821 void **arg_parm;
514f1c67
ACM
822};
823
fd2b2975
ACM
824/*
825 * We need to have this 'calculated' boolean because in some cases we really
826 * don't know what is the duration of a syscall, for instance, when we start
827 * a session and some threads are waiting for a syscall to finish, say 'poll',
828 * in which case all we can do is to print "( ? ) for duration and for the
829 * start timestamp.
830 */
831static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
832{
833 double duration = (double)t / NSEC_PER_MSEC;
834 size_t printed = fprintf(fp, "(");
835
fd2b2975
ACM
836 if (!calculated)
837 printed += fprintf(fp, " ? ");
838 else if (duration >= 1.0)
60c907ab
ACM
839 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
840 else if (duration >= 0.01)
841 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
842 else
843 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 844 return printed + fprintf(fp, "): ");
60c907ab
ACM
845}
846
f994592d
ACM
847/**
848 * filename.ptr: The filename char pointer that will be vfs_getname'd
849 * filename.entry_str_pos: Where to insert the string translated from
850 * filename.ptr by the vfs_getname tracepoint/kprobe.
851 */
752fde44
ACM
852struct thread_trace {
853 u64 entry_time;
752fde44 854 bool entry_pending;
efd5745e 855 unsigned long nr_events;
a2ea67d7 856 unsigned long pfmaj, pfmin;
752fde44 857 char *entry_str;
1302d88e 858 double runtime_ms;
f994592d
ACM
859 struct {
860 unsigned long ptr;
7f4f8001
ACM
861 short int entry_str_pos;
862 bool pending_open;
863 unsigned int namelen;
864 char *name;
f994592d 865 } filename;
75b757ca
ACM
866 struct {
867 int max;
868 char **table;
869 } paths;
bf2575c1
DA
870
871 struct intlist *syscall_stats;
752fde44
ACM
872};
873
874static struct thread_trace *thread_trace__new(void)
875{
75b757ca
ACM
876 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
877
878 if (ttrace)
879 ttrace->paths.max = -1;
880
bf2575c1
DA
881 ttrace->syscall_stats = intlist__new(NULL);
882
75b757ca 883 return ttrace;
752fde44
ACM
884}
885
c24ff998 886static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 887{
efd5745e
ACM
888 struct thread_trace *ttrace;
889
752fde44
ACM
890 if (thread == NULL)
891 goto fail;
892
89dceb22
NK
893 if (thread__priv(thread) == NULL)
894 thread__set_priv(thread, thread_trace__new());
48000a1a 895
89dceb22 896 if (thread__priv(thread) == NULL)
752fde44
ACM
897 goto fail;
898
89dceb22 899 ttrace = thread__priv(thread);
efd5745e
ACM
900 ++ttrace->nr_events;
901
902 return ttrace;
752fde44 903fail:
c24ff998 904 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
905 "WARNING: not enough memory, dropping samples!\n");
906 return NULL;
907}
908
598d02c5
SF
909#define TRACE_PFMAJ (1 << 0)
910#define TRACE_PFMIN (1 << 1)
911
e4d44e83
ACM
912static const size_t trace__entry_str_size = 2048;
913
97119f37 914static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 915{
89dceb22 916 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
917
918 if (fd > ttrace->paths.max) {
919 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
920
921 if (npath == NULL)
922 return -1;
923
924 if (ttrace->paths.max != -1) {
925 memset(npath + ttrace->paths.max + 1, 0,
926 (fd - ttrace->paths.max) * sizeof(char *));
927 } else {
928 memset(npath, 0, (fd + 1) * sizeof(char *));
929 }
930
931 ttrace->paths.table = npath;
932 ttrace->paths.max = fd;
933 }
934
935 ttrace->paths.table[fd] = strdup(pathname);
936
937 return ttrace->paths.table[fd] != NULL ? 0 : -1;
938}
939
97119f37
ACM
940static int thread__read_fd_path(struct thread *thread, int fd)
941{
942 char linkname[PATH_MAX], pathname[PATH_MAX];
943 struct stat st;
944 int ret;
945
946 if (thread->pid_ == thread->tid) {
947 scnprintf(linkname, sizeof(linkname),
948 "/proc/%d/fd/%d", thread->pid_, fd);
949 } else {
950 scnprintf(linkname, sizeof(linkname),
951 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
952 }
953
954 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
955 return -1;
956
957 ret = readlink(linkname, pathname, sizeof(pathname));
958
959 if (ret < 0 || ret > st.st_size)
960 return -1;
961
962 pathname[ret] = '\0';
963 return trace__set_fd_pathname(thread, fd, pathname);
964}
965
c522739d
ACM
966static const char *thread__fd_path(struct thread *thread, int fd,
967 struct trace *trace)
75b757ca 968{
89dceb22 969 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
970
971 if (ttrace == NULL)
972 return NULL;
973
974 if (fd < 0)
975 return NULL;
976
cdcd1e6b 977 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
978 if (!trace->live)
979 return NULL;
980 ++trace->stats.proc_getname;
cdcd1e6b 981 if (thread__read_fd_path(thread, fd))
c522739d
ACM
982 return NULL;
983 }
75b757ca
ACM
984
985 return ttrace->paths.table[fd];
986}
987
988static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
989 struct syscall_arg *arg)
990{
991 int fd = arg->val;
992 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 993 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
994
995 if (path)
996 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
997
998 return printed;
999}
1000
1001static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1002 struct syscall_arg *arg)
1003{
1004 int fd = arg->val;
1005 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1006 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1007
04662523
ACM
1008 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1009 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1010
1011 return printed;
1012}
1013
f994592d
ACM
1014static void thread__set_filename_pos(struct thread *thread, const char *bf,
1015 unsigned long ptr)
1016{
1017 struct thread_trace *ttrace = thread__priv(thread);
1018
1019 ttrace->filename.ptr = ptr;
1020 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1021}
1022
1023static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1024 struct syscall_arg *arg)
1025{
1026 unsigned long ptr = arg->val;
1027
1028 if (!arg->trace->vfs_getname)
1029 return scnprintf(bf, size, "%#x", ptr);
1030
1031 thread__set_filename_pos(arg->thread, bf, ptr);
1032 return 0;
1033}
1034
ae9ed035
ACM
1035static bool trace__filter_duration(struct trace *trace, double t)
1036{
1037 return t < (trace->duration_filter * NSEC_PER_MSEC);
1038}
1039
fd2b2975 1040static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1041{
1042 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1043
60c907ab 1044 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1045}
1046
fd2b2975
ACM
1047/*
1048 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1049 * using ttrace->entry_time for a thread that receives a sys_exit without
1050 * first having received a sys_enter ("poll" issued before tracing session
1051 * starts, lost sys_enter exit due to ring buffer overflow).
1052 */
1053static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1054{
1055 if (tstamp > 0)
1056 return __trace__fprintf_tstamp(trace, tstamp, fp);
1057
1058 return fprintf(fp, " ? ");
1059}
1060
f15eb531 1061static bool done = false;
ba209f85 1062static bool interrupted = false;
f15eb531 1063
ba209f85 1064static void sig_handler(int sig)
f15eb531
NK
1065{
1066 done = true;
ba209f85 1067 interrupted = sig == SIGINT;
f15eb531
NK
1068}
1069
752fde44 1070static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1071 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1072{
1073 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1074 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1075
50c95cbd
ACM
1076 if (trace->multiple_threads) {
1077 if (trace->show_comm)
1902efe7 1078 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1079 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1080 }
752fde44
ACM
1081
1082 return printed;
1083}
1084
c24ff998 1085static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1086 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1087{
1088 int ret = 0;
1089
1090 switch (event->header.type) {
1091 case PERF_RECORD_LOST:
c24ff998 1092 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1093 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1094 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1095 break;
752fde44 1096 default:
162f0bef 1097 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1098 break;
1099 }
1100
1101 return ret;
1102}
1103
c24ff998 1104static int trace__tool_process(struct perf_tool *tool,
752fde44 1105 union perf_event *event,
162f0bef 1106 struct perf_sample *sample,
752fde44
ACM
1107 struct machine *machine)
1108{
c24ff998 1109 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1110 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1111}
1112
caf8a0d0
ACM
1113static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1114{
1115 struct machine *machine = vmachine;
1116
1117 if (machine->kptr_restrict_warned)
1118 return NULL;
1119
1120 if (symbol_conf.kptr_restrict) {
1121 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1122 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1123 "Kernel samples will not be resolved.\n");
1124 machine->kptr_restrict_warned = true;
1125 return NULL;
1126 }
1127
1128 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1129}
1130
752fde44
ACM
1131static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1132{
0a7e6d1b 1133 int err = symbol__init(NULL);
752fde44
ACM
1134
1135 if (err)
1136 return err;
1137
8fb598e5
DA
1138 trace->host = machine__new_host();
1139 if (trace->host == NULL)
1140 return -ENOMEM;
752fde44 1141
caf8a0d0 1142 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1143 return -errno;
1144
a33fbd56 1145 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1146 evlist->threads, trace__tool_process, false,
1147 trace->opts.proc_map_timeout);
752fde44
ACM
1148 if (err)
1149 symbol__exit();
1150
1151 return err;
1152}
1153
13d4ff3e
ACM
1154static int syscall__set_arg_fmts(struct syscall *sc)
1155{
1156 struct format_field *field;
b6565c90 1157 int idx = 0, len;
13d4ff3e 1158
f208bd8d 1159 sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *));
13d4ff3e
ACM
1160 if (sc->arg_scnprintf == NULL)
1161 return -1;
1162
1f115cb7
ACM
1163 if (sc->fmt)
1164 sc->arg_parm = sc->fmt->arg_parm;
1165
f208bd8d 1166 for (field = sc->args; field; field = field->next) {
beccb2b5
ACM
1167 if (sc->fmt && sc->fmt->arg_scnprintf[idx])
1168 sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx];
12f3ca4f
ACM
1169 else if (strcmp(field->type, "const char *") == 0 &&
1170 (strcmp(field->name, "filename") == 0 ||
1171 strcmp(field->name, "path") == 0 ||
1172 strcmp(field->name, "pathname") == 0))
1173 sc->arg_scnprintf[idx] = SCA_FILENAME;
beccb2b5 1174 else if (field->flags & FIELD_IS_POINTER)
13d4ff3e 1175 sc->arg_scnprintf[idx] = syscall_arg__scnprintf_hex;
d1d438a3
ACM
1176 else if (strcmp(field->type, "pid_t") == 0)
1177 sc->arg_scnprintf[idx] = SCA_PID;
ba2f22cf
ACM
1178 else if (strcmp(field->type, "umode_t") == 0)
1179 sc->arg_scnprintf[idx] = SCA_MODE_T;
b6565c90
ACM
1180 else if ((strcmp(field->type, "int") == 0 ||
1181 strcmp(field->type, "unsigned int") == 0 ||
1182 strcmp(field->type, "long") == 0) &&
1183 (len = strlen(field->name)) >= 2 &&
1184 strcmp(field->name + len - 2, "fd") == 0) {
1185 /*
1186 * /sys/kernel/tracing/events/syscalls/sys_enter*
1187 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1188 * 65 int
1189 * 23 unsigned int
1190 * 7 unsigned long
1191 */
1192 sc->arg_scnprintf[idx] = SCA_FD;
1193 }
13d4ff3e
ACM
1194 ++idx;
1195 }
1196
1197 return 0;
1198}
1199
514f1c67
ACM
1200static int trace__read_syscall_info(struct trace *trace, int id)
1201{
1202 char tp_name[128];
1203 struct syscall *sc;
fd0db102 1204 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1205
1206 if (name == NULL)
1207 return -1;
514f1c67
ACM
1208
1209 if (id > trace->syscalls.max) {
1210 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1211
1212 if (nsyscalls == NULL)
1213 return -1;
1214
1215 if (trace->syscalls.max != -1) {
1216 memset(nsyscalls + trace->syscalls.max + 1, 0,
1217 (id - trace->syscalls.max) * sizeof(*sc));
1218 } else {
1219 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1220 }
1221
1222 trace->syscalls.table = nsyscalls;
1223 trace->syscalls.max = id;
1224 }
1225
1226 sc = trace->syscalls.table + id;
3a531260 1227 sc->name = name;
2ae3a312 1228
3a531260 1229 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1230
aec1930b 1231 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1232 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1233
8dd2a131 1234 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1235 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1236 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1237 }
514f1c67 1238
8dd2a131 1239 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1240 return -1;
1241
f208bd8d
ACM
1242 sc->args = sc->tp_format->format.fields;
1243 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1244 /*
1245 * We need to check and discard the first variable '__syscall_nr'
1246 * or 'nr' that mean the syscall number. It is needless here.
1247 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1248 */
1249 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1250 sc->args = sc->args->next;
1251 --sc->nr_args;
1252 }
1253
5089f20e
ACM
1254 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1255
13d4ff3e 1256 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1257}
1258
d0cc439b
ACM
1259static int trace__validate_ev_qualifier(struct trace *trace)
1260{
8b3ce757 1261 int err = 0, i;
d0cc439b
ACM
1262 struct str_node *pos;
1263
8b3ce757
ACM
1264 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1265 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1266 sizeof(trace->ev_qualifier_ids.entries[0]));
1267
1268 if (trace->ev_qualifier_ids.entries == NULL) {
1269 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1270 trace->output);
1271 err = -EINVAL;
1272 goto out;
1273 }
1274
1275 i = 0;
1276
602a1f4d 1277 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1278 const char *sc = pos->s;
fd0db102 1279 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1280
8b3ce757 1281 if (id < 0) {
d0cc439b
ACM
1282 if (err == 0) {
1283 fputs("Error:\tInvalid syscall ", trace->output);
1284 err = -EINVAL;
1285 } else {
1286 fputs(", ", trace->output);
1287 }
1288
1289 fputs(sc, trace->output);
1290 }
8b3ce757
ACM
1291
1292 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1293 }
1294
1295 if (err < 0) {
1296 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1297 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1298 zfree(&trace->ev_qualifier_ids.entries);
1299 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1300 }
8b3ce757 1301out:
d0cc439b
ACM
1302 return err;
1303}
1304
55d43bca
DA
1305/*
1306 * args is to be interpreted as a series of longs but we need to handle
1307 * 8-byte unaligned accesses. args points to raw_data within the event
1308 * and raw_data is guaranteed to be 8-byte unaligned because it is
1309 * preceded by raw_size which is a u32. So we need to copy args to a temp
1310 * variable to read it. Most notably this avoids extended load instructions
1311 * on unaligned addresses
1312 */
1313
752fde44 1314static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1315 unsigned char *args, struct trace *trace,
75b757ca 1316 struct thread *thread)
514f1c67 1317{
514f1c67 1318 size_t printed = 0;
55d43bca
DA
1319 unsigned char *p;
1320 unsigned long val;
514f1c67 1321
f208bd8d 1322 if (sc->args != NULL) {
514f1c67 1323 struct format_field *field;
01533e97
ACM
1324 u8 bit = 1;
1325 struct syscall_arg arg = {
75b757ca
ACM
1326 .idx = 0,
1327 .mask = 0,
1328 .trace = trace,
1329 .thread = thread,
01533e97 1330 };
6e7eeb51 1331
f208bd8d 1332 for (field = sc->args; field;
01533e97
ACM
1333 field = field->next, ++arg.idx, bit <<= 1) {
1334 if (arg.mask & bit)
6e7eeb51 1335 continue;
55d43bca
DA
1336
1337 /* special care for unaligned accesses */
1338 p = args + sizeof(unsigned long) * arg.idx;
1339 memcpy(&val, p, sizeof(val));
1340
4aa58232
ACM
1341 /*
1342 * Suppress this argument if its value is zero and
1343 * and we don't have a string associated in an
1344 * strarray for it.
1345 */
55d43bca 1346 if (val == 0 &&
4aa58232
ACM
1347 !(sc->arg_scnprintf &&
1348 sc->arg_scnprintf[arg.idx] == SCA_STRARRAY &&
1349 sc->arg_parm[arg.idx]))
22ae5cf1
ACM
1350 continue;
1351
752fde44 1352 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1353 "%s%s: ", printed ? ", " : "", field->name);
01533e97 1354 if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) {
55d43bca 1355 arg.val = val;
1f115cb7
ACM
1356 if (sc->arg_parm)
1357 arg.parm = sc->arg_parm[arg.idx];
01533e97
ACM
1358 printed += sc->arg_scnprintf[arg.idx](bf + printed,
1359 size - printed, &arg);
6e7eeb51 1360 } else {
13d4ff3e 1361 printed += scnprintf(bf + printed, size - printed,
55d43bca 1362 "%ld", val);
6e7eeb51 1363 }
514f1c67 1364 }
4c4d6e51
ACM
1365 } else if (IS_ERR(sc->tp_format)) {
1366 /*
1367 * If we managed to read the tracepoint /format file, then we
1368 * may end up not having any args, like with gettid(), so only
1369 * print the raw args when we didn't manage to read it.
1370 */
01533e97
ACM
1371 int i = 0;
1372
514f1c67 1373 while (i < 6) {
55d43bca
DA
1374 /* special care for unaligned accesses */
1375 p = args + sizeof(unsigned long) * i;
1376 memcpy(&val, p, sizeof(val));
752fde44
ACM
1377 printed += scnprintf(bf + printed, size - printed,
1378 "%sarg%d: %ld",
55d43bca 1379 printed ? ", " : "", i, val);
514f1c67
ACM
1380 ++i;
1381 }
1382 }
1383
1384 return printed;
1385}
1386
ba3d7dee 1387typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1388 union perf_event *event,
ba3d7dee
ACM
1389 struct perf_sample *sample);
1390
1391static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1392 struct perf_evsel *evsel, int id)
ba3d7dee 1393{
ba3d7dee
ACM
1394
1395 if (id < 0) {
adaa18bf
ACM
1396
1397 /*
1398 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1399 * before that, leaving at a higher verbosity level till that is
1400 * explained. Reproduced with plain ftrace with:
1401 *
1402 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1403 * grep "NR -1 " /t/trace_pipe
1404 *
1405 * After generating some load on the machine.
1406 */
1407 if (verbose > 1) {
1408 static u64 n;
1409 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1410 id, perf_evsel__name(evsel), ++n);
1411 }
ba3d7dee
ACM
1412 return NULL;
1413 }
1414
1415 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1416 trace__read_syscall_info(trace, id))
1417 goto out_cant_read;
1418
1419 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1420 goto out_cant_read;
1421
1422 return &trace->syscalls.table[id];
1423
1424out_cant_read:
bb963e16 1425 if (verbose > 0) {
7c304ee0
ACM
1426 fprintf(trace->output, "Problems reading syscall %d", id);
1427 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1428 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1429 fputs(" information\n", trace->output);
1430 }
ba3d7dee
ACM
1431 return NULL;
1432}
1433
bf2575c1
DA
1434static void thread__update_stats(struct thread_trace *ttrace,
1435 int id, struct perf_sample *sample)
1436{
1437 struct int_node *inode;
1438 struct stats *stats;
1439 u64 duration = 0;
1440
1441 inode = intlist__findnew(ttrace->syscall_stats, id);
1442 if (inode == NULL)
1443 return;
1444
1445 stats = inode->priv;
1446 if (stats == NULL) {
1447 stats = malloc(sizeof(struct stats));
1448 if (stats == NULL)
1449 return;
1450 init_stats(stats);
1451 inode->priv = stats;
1452 }
1453
1454 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1455 duration = sample->time - ttrace->entry_time;
1456
1457 update_stats(stats, duration);
1458}
1459
e596663e
ACM
1460static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1461{
1462 struct thread_trace *ttrace;
1463 u64 duration;
1464 size_t printed;
1465
1466 if (trace->current == NULL)
1467 return 0;
1468
1469 ttrace = thread__priv(trace->current);
1470
1471 if (!ttrace->entry_pending)
1472 return 0;
1473
1474 duration = sample->time - ttrace->entry_time;
1475
fd2b2975 1476 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1477 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1478 ttrace->entry_pending = false;
1479
1480 return printed;
1481}
1482
ba3d7dee 1483static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1484 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1485 struct perf_sample *sample)
1486{
752fde44 1487 char *msg;
ba3d7dee 1488 void *args;
752fde44 1489 size_t printed = 0;
2ae3a312 1490 struct thread *thread;
b91fc39f 1491 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1492 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1493 struct thread_trace *ttrace;
1494
1495 if (sc == NULL)
1496 return -1;
ba3d7dee 1497
8fb598e5 1498 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1499 ttrace = thread__trace(thread, trace->output);
2ae3a312 1500 if (ttrace == NULL)
b91fc39f 1501 goto out_put;
ba3d7dee 1502
77170988 1503 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1504
1505 if (ttrace->entry_str == NULL) {
e4d44e83 1506 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1507 if (!ttrace->entry_str)
b91fc39f 1508 goto out_put;
752fde44
ACM
1509 }
1510
5cf9c84e 1511 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1512 trace__printf_interrupted_entry(trace, sample);
e596663e 1513
752fde44
ACM
1514 ttrace->entry_time = sample->time;
1515 msg = ttrace->entry_str;
e4d44e83 1516 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1517
e4d44e83 1518 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1519 args, trace, thread);
752fde44 1520
5089f20e 1521 if (sc->is_exit) {
5cf9c84e 1522 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1523 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1524 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1525 }
7f4f8001 1526 } else {
752fde44 1527 ttrace->entry_pending = true;
7f4f8001
ACM
1528 /* See trace__vfs_getname & trace__sys_exit */
1529 ttrace->filename.pending_open = false;
1530 }
ba3d7dee 1531
f3b623b8
ACM
1532 if (trace->current != thread) {
1533 thread__put(trace->current);
1534 trace->current = thread__get(thread);
1535 }
b91fc39f
ACM
1536 err = 0;
1537out_put:
1538 thread__put(thread);
1539 return err;
ba3d7dee
ACM
1540}
1541
5cf9c84e
ACM
1542static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1543 struct perf_sample *sample,
1544 struct callchain_cursor *cursor)
202ff968
ACM
1545{
1546 struct addr_location al;
5cf9c84e
ACM
1547
1548 if (machine__resolve(trace->host, &al, sample) < 0 ||
1549 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1550 return -1;
1551
1552 return 0;
1553}
1554
1555static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1556{
202ff968 1557 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1558 const unsigned int print_opts = EVSEL__PRINT_SYM |
1559 EVSEL__PRINT_DSO |
1560 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1561
d327e60c 1562 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1563}
1564
ba3d7dee 1565static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1566 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1567 struct perf_sample *sample)
1568{
2c82c3ad 1569 long ret;
60c907ab 1570 u64 duration = 0;
fd2b2975 1571 bool duration_calculated = false;
2ae3a312 1572 struct thread *thread;
5cf9c84e 1573 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1574 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1575 struct thread_trace *ttrace;
1576
1577 if (sc == NULL)
1578 return -1;
ba3d7dee 1579
8fb598e5 1580 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1581 ttrace = thread__trace(thread, trace->output);
2ae3a312 1582 if (ttrace == NULL)
b91fc39f 1583 goto out_put;
ba3d7dee 1584
bf2575c1
DA
1585 if (trace->summary)
1586 thread__update_stats(ttrace, id, sample);
1587
77170988 1588 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1589
fd0db102 1590 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1591 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1592 ttrace->filename.pending_open = false;
c522739d
ACM
1593 ++trace->stats.vfs_getname;
1594 }
1595
ae9ed035 1596 if (ttrace->entry_time) {
60c907ab 1597 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1598 if (trace__filter_duration(trace, duration))
1599 goto out;
fd2b2975 1600 duration_calculated = true;
ae9ed035
ACM
1601 } else if (trace->duration_filter)
1602 goto out;
60c907ab 1603
5cf9c84e
ACM
1604 if (sample->callchain) {
1605 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1606 if (callchain_ret == 0) {
1607 if (callchain_cursor.nr < trace->min_stack)
1608 goto out;
1609 callchain_ret = 1;
1610 }
1611 }
1612
fd2eabaf
DA
1613 if (trace->summary_only)
1614 goto out;
1615
fd2b2975 1616 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1617
1618 if (ttrace->entry_pending) {
c24ff998 1619 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1620 } else {
c24ff998
ACM
1621 fprintf(trace->output, " ... [");
1622 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1623 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1624 }
1625
da3c9a44
ACM
1626 if (sc->fmt == NULL) {
1627signed_print:
2c82c3ad 1628 fprintf(trace->output, ") = %ld", ret);
11c8e39f 1629 } else if (ret < 0 && (sc->fmt->errmsg || sc->fmt->errpid)) {
942a91ed 1630 char bf[STRERR_BUFSIZE];
c8b5f2c9 1631 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1632 *e = audit_errno_to_name(-ret);
1633
c24ff998 1634 fprintf(trace->output, ") = -1 %s %s", e, emsg);
da3c9a44 1635 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1636 fprintf(trace->output, ") = 0 Timeout");
04b34729 1637 else if (sc->fmt->hexret)
2c82c3ad 1638 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1639 else if (sc->fmt->errpid) {
1640 struct thread *child = machine__find_thread(trace->host, ret, ret);
1641
1642 if (child != NULL) {
1643 fprintf(trace->output, ") = %ld", ret);
1644 if (child->comm_set)
1645 fprintf(trace->output, " (%s)", thread__comm_str(child));
1646 thread__put(child);
1647 }
1648 } else
da3c9a44 1649 goto signed_print;
ba3d7dee 1650
c24ff998 1651 fputc('\n', trace->output);
566a0885 1652
5cf9c84e
ACM
1653 if (callchain_ret > 0)
1654 trace__fprintf_callchain(trace, sample);
1655 else if (callchain_ret < 0)
1656 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1657out:
752fde44 1658 ttrace->entry_pending = false;
b91fc39f
ACM
1659 err = 0;
1660out_put:
1661 thread__put(thread);
1662 return err;
ba3d7dee
ACM
1663}
1664
c522739d 1665static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1666 union perf_event *event __maybe_unused,
c522739d
ACM
1667 struct perf_sample *sample)
1668{
f994592d
ACM
1669 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1670 struct thread_trace *ttrace;
1671 size_t filename_len, entry_str_len, to_move;
1672 ssize_t remaining_space;
1673 char *pos;
7f4f8001 1674 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1675
1676 if (!thread)
1677 goto out;
1678
1679 ttrace = thread__priv(thread);
1680 if (!ttrace)
ef65e96e 1681 goto out_put;
f994592d 1682
7f4f8001 1683 filename_len = strlen(filename);
39f0e7a8 1684 if (filename_len == 0)
ef65e96e 1685 goto out_put;
7f4f8001
ACM
1686
1687 if (ttrace->filename.namelen < filename_len) {
1688 char *f = realloc(ttrace->filename.name, filename_len + 1);
1689
1690 if (f == NULL)
ef65e96e 1691 goto out_put;
7f4f8001
ACM
1692
1693 ttrace->filename.namelen = filename_len;
1694 ttrace->filename.name = f;
1695 }
1696
1697 strcpy(ttrace->filename.name, filename);
1698 ttrace->filename.pending_open = true;
1699
f994592d 1700 if (!ttrace->filename.ptr)
ef65e96e 1701 goto out_put;
f994592d
ACM
1702
1703 entry_str_len = strlen(ttrace->entry_str);
1704 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1705 if (remaining_space <= 0)
ef65e96e 1706 goto out_put;
f994592d 1707
f994592d
ACM
1708 if (filename_len > (size_t)remaining_space) {
1709 filename += filename_len - remaining_space;
1710 filename_len = remaining_space;
1711 }
1712
1713 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1714 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1715 memmove(pos + filename_len, pos, to_move);
1716 memcpy(pos, filename, filename_len);
1717
1718 ttrace->filename.ptr = 0;
1719 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1720out_put:
1721 thread__put(thread);
f994592d 1722out:
c522739d
ACM
1723 return 0;
1724}
1725
1302d88e 1726static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1727 union perf_event *event __maybe_unused,
1302d88e
ACM
1728 struct perf_sample *sample)
1729{
1730 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1731 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1732 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1733 sample->pid,
1734 sample->tid);
c24ff998 1735 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1736
1737 if (ttrace == NULL)
1738 goto out_dump;
1739
1740 ttrace->runtime_ms += runtime_ms;
1741 trace->runtime_ms += runtime_ms;
ef65e96e 1742out_put:
b91fc39f 1743 thread__put(thread);
1302d88e
ACM
1744 return 0;
1745
1746out_dump:
c24ff998 1747 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1748 evsel->name,
1749 perf_evsel__strval(evsel, sample, "comm"),
1750 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1751 runtime,
1752 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1753 goto out_put;
1302d88e
ACM
1754}
1755
1d6c9407
WN
1756static void bpf_output__printer(enum binary_printer_ops op,
1757 unsigned int val, void *extra)
1758{
1759 FILE *output = extra;
1760 unsigned char ch = (unsigned char)val;
1761
1762 switch (op) {
1763 case BINARY_PRINT_CHAR_DATA:
1764 fprintf(output, "%c", isprint(ch) ? ch : '.');
1765 break;
1766 case BINARY_PRINT_DATA_BEGIN:
1767 case BINARY_PRINT_LINE_BEGIN:
1768 case BINARY_PRINT_ADDR:
1769 case BINARY_PRINT_NUM_DATA:
1770 case BINARY_PRINT_NUM_PAD:
1771 case BINARY_PRINT_SEP:
1772 case BINARY_PRINT_CHAR_PAD:
1773 case BINARY_PRINT_LINE_END:
1774 case BINARY_PRINT_DATA_END:
1775 default:
1776 break;
1777 }
1778}
1779
1780static void bpf_output__fprintf(struct trace *trace,
1781 struct perf_sample *sample)
1782{
1783 print_binary(sample->raw_data, sample->raw_size, 8,
1784 bpf_output__printer, trace->output);
1785}
1786
14a052df
ACM
1787static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1788 union perf_event *event __maybe_unused,
1789 struct perf_sample *sample)
1790{
7ad35615
ACM
1791 int callchain_ret = 0;
1792
1793 if (sample->callchain) {
1794 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1795 if (callchain_ret == 0) {
1796 if (callchain_cursor.nr < trace->min_stack)
1797 goto out;
1798 callchain_ret = 1;
1799 }
1800 }
1801
14a052df
ACM
1802 trace__printf_interrupted_entry(trace, sample);
1803 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1804
1805 if (trace->trace_syscalls)
1806 fprintf(trace->output, "( ): ");
1807
1808 fprintf(trace->output, "%s:", evsel->name);
14a052df 1809
1d6c9407
WN
1810 if (perf_evsel__is_bpf_output(evsel)) {
1811 bpf_output__fprintf(trace, sample);
1812 } else if (evsel->tp_format) {
14a052df
ACM
1813 event_format__fprintf(evsel->tp_format, sample->cpu,
1814 sample->raw_data, sample->raw_size,
1815 trace->output);
1816 }
1817
1818 fprintf(trace->output, ")\n");
202ff968 1819
7ad35615
ACM
1820 if (callchain_ret > 0)
1821 trace__fprintf_callchain(trace, sample);
1822 else if (callchain_ret < 0)
1823 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1824out:
14a052df
ACM
1825 return 0;
1826}
1827
598d02c5
SF
1828static void print_location(FILE *f, struct perf_sample *sample,
1829 struct addr_location *al,
1830 bool print_dso, bool print_sym)
1831{
1832
bb963e16 1833 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1834 fprintf(f, "%s@", al->map->dso->long_name);
1835
bb963e16 1836 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1837 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1838 al->addr - al->sym->start);
1839 else if (al->map)
4414a3c5 1840 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1841 else
4414a3c5 1842 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1843}
1844
1845static int trace__pgfault(struct trace *trace,
1846 struct perf_evsel *evsel,
473398a2 1847 union perf_event *event __maybe_unused,
598d02c5
SF
1848 struct perf_sample *sample)
1849{
1850 struct thread *thread;
598d02c5
SF
1851 struct addr_location al;
1852 char map_type = 'd';
a2ea67d7 1853 struct thread_trace *ttrace;
b91fc39f 1854 int err = -1;
1df54290 1855 int callchain_ret = 0;
598d02c5
SF
1856
1857 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1858
1859 if (sample->callchain) {
1860 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1861 if (callchain_ret == 0) {
1862 if (callchain_cursor.nr < trace->min_stack)
1863 goto out_put;
1864 callchain_ret = 1;
1865 }
1866 }
1867
a2ea67d7
SF
1868 ttrace = thread__trace(thread, trace->output);
1869 if (ttrace == NULL)
b91fc39f 1870 goto out_put;
a2ea67d7
SF
1871
1872 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1873 ttrace->pfmaj++;
1874 else
1875 ttrace->pfmin++;
1876
1877 if (trace->summary_only)
b91fc39f 1878 goto out;
598d02c5 1879
473398a2 1880 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1881 sample->ip, &al);
1882
fd2b2975 1883 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1884
1885 fprintf(trace->output, "%sfault [",
1886 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1887 "maj" : "min");
1888
1889 print_location(trace->output, sample, &al, false, true);
1890
1891 fprintf(trace->output, "] => ");
1892
473398a2 1893 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1894 sample->addr, &al);
1895
1896 if (!al.map) {
473398a2 1897 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1898 MAP__FUNCTION, sample->addr, &al);
1899
1900 if (al.map)
1901 map_type = 'x';
1902 else
1903 map_type = '?';
1904 }
1905
1906 print_location(trace->output, sample, &al, true, false);
1907
1908 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1909
1df54290
ACM
1910 if (callchain_ret > 0)
1911 trace__fprintf_callchain(trace, sample);
1912 else if (callchain_ret < 0)
1913 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1914out:
1915 err = 0;
1916out_put:
1917 thread__put(thread);
1918 return err;
598d02c5
SF
1919}
1920
e6001980 1921static void trace__set_base_time(struct trace *trace,
8a07a809 1922 struct perf_evsel *evsel,
e6001980
ACM
1923 struct perf_sample *sample)
1924{
8a07a809
ACM
1925 /*
1926 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1927 * and don't use sample->time unconditionally, we may end up having
1928 * some other event in the future without PERF_SAMPLE_TIME for good
1929 * reason, i.e. we may not be interested in its timestamps, just in
1930 * it taking place, picking some piece of information when it
1931 * appears in our event stream (vfs_getname comes to mind).
1932 */
1933 if (trace->base_time == 0 && !trace->full_time &&
1934 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1935 trace->base_time = sample->time;
1936}
1937
6810fc91 1938static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1939 union perf_event *event,
6810fc91
DA
1940 struct perf_sample *sample,
1941 struct perf_evsel *evsel,
1942 struct machine *machine __maybe_unused)
1943{
1944 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1945 struct thread *thread;
6810fc91
DA
1946 int err = 0;
1947
744a9719 1948 tracepoint_handler handler = evsel->handler;
6810fc91 1949
aa07df6e
DA
1950 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1951 if (thread && thread__is_filtered(thread))
ef65e96e 1952 goto out;
bdc89661 1953
e6001980 1954 trace__set_base_time(trace, evsel, sample);
6810fc91 1955
3160565f
DA
1956 if (handler) {
1957 ++trace->nr_events;
0c82adcf 1958 handler(trace, evsel, event, sample);
3160565f 1959 }
ef65e96e
ACM
1960out:
1961 thread__put(thread);
6810fc91
DA
1962 return err;
1963}
1964
1e28fe0a 1965static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
1966{
1967 unsigned int rec_argc, i, j;
1968 const char **rec_argv;
1969 const char * const record_args[] = {
1970 "record",
1971 "-R",
1972 "-m", "1024",
1973 "-c", "1",
5e2485b1
DA
1974 };
1975
1e28fe0a
SF
1976 const char * const sc_args[] = { "-e", };
1977 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
1978 const char * const majpf_args[] = { "-e", "major-faults" };
1979 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
1980 const char * const minpf_args[] = { "-e", "minor-faults" };
1981 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
1982
9aca7f17 1983 /* +1 is for the event string below */
1e28fe0a
SF
1984 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
1985 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
1986 rec_argv = calloc(rec_argc + 1, sizeof(char *));
1987
1988 if (rec_argv == NULL)
1989 return -ENOMEM;
1990
1e28fe0a 1991 j = 0;
5e2485b1 1992 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
1993 rec_argv[j++] = record_args[i];
1994
e281a960
SF
1995 if (trace->trace_syscalls) {
1996 for (i = 0; i < sc_args_nr; i++)
1997 rec_argv[j++] = sc_args[i];
1998
1999 /* event string may be different for older kernels - e.g., RHEL6 */
2000 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2001 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2002 else if (is_valid_tracepoint("syscalls:sys_enter"))
2003 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2004 else {
2005 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2006 return -1;
2007 }
9aca7f17 2008 }
9aca7f17 2009
1e28fe0a
SF
2010 if (trace->trace_pgfaults & TRACE_PFMAJ)
2011 for (i = 0; i < majpf_args_nr; i++)
2012 rec_argv[j++] = majpf_args[i];
2013
2014 if (trace->trace_pgfaults & TRACE_PFMIN)
2015 for (i = 0; i < minpf_args_nr; i++)
2016 rec_argv[j++] = minpf_args[i];
2017
2018 for (i = 0; i < (unsigned int)argc; i++)
2019 rec_argv[j++] = argv[i];
5e2485b1 2020
b0ad8ea6 2021 return cmd_record(j, rec_argv);
5e2485b1
DA
2022}
2023
bf2575c1
DA
2024static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2025
08c98776 2026static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2027{
ef503831 2028 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2029
2030 if (IS_ERR(evsel))
08c98776 2031 return false;
c522739d
ACM
2032
2033 if (perf_evsel__field(evsel, "pathname") == NULL) {
2034 perf_evsel__delete(evsel);
08c98776 2035 return false;
c522739d
ACM
2036 }
2037
744a9719 2038 evsel->handler = trace__vfs_getname;
c522739d 2039 perf_evlist__add(evlist, evsel);
08c98776 2040 return true;
c522739d
ACM
2041}
2042
0ae537cb 2043static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2044{
2045 struct perf_evsel *evsel;
2046 struct perf_event_attr attr = {
2047 .type = PERF_TYPE_SOFTWARE,
2048 .mmap_data = 1,
598d02c5
SF
2049 };
2050
2051 attr.config = config;
0524798c 2052 attr.sample_period = 1;
598d02c5
SF
2053
2054 event_attr_init(&attr);
2055
2056 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2057 if (evsel)
2058 evsel->handler = trace__pgfault;
598d02c5 2059
0ae537cb 2060 return evsel;
598d02c5
SF
2061}
2062
ddbb1b13
ACM
2063static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2064{
2065 const u32 type = event->header.type;
2066 struct perf_evsel *evsel;
2067
ddbb1b13
ACM
2068 if (type != PERF_RECORD_SAMPLE) {
2069 trace__process_event(trace, trace->host, event, sample);
2070 return;
2071 }
2072
2073 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2074 if (evsel == NULL) {
2075 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2076 return;
2077 }
2078
e6001980
ACM
2079 trace__set_base_time(trace, evsel, sample);
2080
ddbb1b13
ACM
2081 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2082 sample->raw_data == NULL) {
2083 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2084 perf_evsel__name(evsel), sample->tid,
2085 sample->cpu, sample->raw_size);
2086 } else {
2087 tracepoint_handler handler = evsel->handler;
2088 handler(trace, evsel, event, sample);
2089 }
2090}
2091
c27366f0
ACM
2092static int trace__add_syscall_newtp(struct trace *trace)
2093{
2094 int ret = -1;
2095 struct perf_evlist *evlist = trace->evlist;
2096 struct perf_evsel *sys_enter, *sys_exit;
2097
2098 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2099 if (sys_enter == NULL)
2100 goto out;
2101
2102 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2103 goto out_delete_sys_enter;
2104
2105 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2106 if (sys_exit == NULL)
2107 goto out_delete_sys_enter;
2108
2109 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2110 goto out_delete_sys_exit;
2111
2112 perf_evlist__add(evlist, sys_enter);
2113 perf_evlist__add(evlist, sys_exit);
2114
2ddd5c04 2115 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2116 /*
2117 * We're interested only in the user space callchain
2118 * leading to the syscall, allow overriding that for
2119 * debugging reasons using --kernel_syscall_callchains
2120 */
2121 sys_exit->attr.exclude_callchain_kernel = 1;
2122 }
2123
8b3ce757
ACM
2124 trace->syscalls.events.sys_enter = sys_enter;
2125 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2126
2127 ret = 0;
2128out:
2129 return ret;
2130
2131out_delete_sys_exit:
2132 perf_evsel__delete_priv(sys_exit);
2133out_delete_sys_enter:
2134 perf_evsel__delete_priv(sys_enter);
2135 goto out;
2136}
2137
19867b61
ACM
2138static int trace__set_ev_qualifier_filter(struct trace *trace)
2139{
2140 int err = -1;
b15d0a4c 2141 struct perf_evsel *sys_exit;
19867b61
ACM
2142 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2143 trace->ev_qualifier_ids.nr,
2144 trace->ev_qualifier_ids.entries);
2145
2146 if (filter == NULL)
2147 goto out_enomem;
2148
3541c034
MP
2149 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2150 filter)) {
b15d0a4c 2151 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2152 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2153 }
19867b61
ACM
2154
2155 free(filter);
2156out:
2157 return err;
2158out_enomem:
2159 errno = ENOMEM;
2160 goto out;
2161}
c27366f0 2162
f15eb531 2163static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2164{
14a052df 2165 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2166 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2167 int err = -1, i;
2168 unsigned long before;
f15eb531 2169 const bool forks = argc > 0;
46fb3c21 2170 bool draining = false;
514f1c67 2171
75b757ca
ACM
2172 trace->live = true;
2173
c27366f0 2174 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2175 goto out_error_raw_syscalls;
514f1c67 2176
e281a960 2177 if (trace->trace_syscalls)
08c98776 2178 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2179
0ae537cb
ACM
2180 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2181 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2182 if (pgfault_maj == NULL)
2183 goto out_error_mem;
2184 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2185 }
598d02c5 2186
0ae537cb
ACM
2187 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2188 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2189 if (pgfault_min == NULL)
2190 goto out_error_mem;
2191 perf_evlist__add(evlist, pgfault_min);
2192 }
598d02c5 2193
1302d88e 2194 if (trace->sched &&
2cc990ba
ACM
2195 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2196 trace__sched_stat_runtime))
2197 goto out_error_sched_stat_runtime;
1302d88e 2198
514f1c67
ACM
2199 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2200 if (err < 0) {
c24ff998 2201 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2202 goto out_delete_evlist;
2203 }
2204
752fde44
ACM
2205 err = trace__symbols_init(trace, evlist);
2206 if (err < 0) {
c24ff998 2207 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2208 goto out_delete_evlist;
752fde44
ACM
2209 }
2210
fde54b78
ACM
2211 perf_evlist__config(evlist, &trace->opts, NULL);
2212
0c3a6ef4
ACM
2213 if (callchain_param.enabled) {
2214 bool use_identifier = false;
2215
2216 if (trace->syscalls.events.sys_exit) {
2217 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2218 &trace->opts, &callchain_param);
2219 use_identifier = true;
2220 }
2221
2222 if (pgfault_maj) {
2223 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2224 use_identifier = true;
2225 }
2226
2227 if (pgfault_min) {
2228 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2229 use_identifier = true;
2230 }
2231
2232 if (use_identifier) {
2233 /*
2234 * Now we have evsels with different sample_ids, use
2235 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2236 * from a fixed position in each ring buffer record.
2237 *
2238 * As of this the changeset introducing this comment, this
2239 * isn't strictly needed, as the fields that can come before
2240 * PERF_SAMPLE_ID are all used, but we'll probably disable
2241 * some of those for things like copying the payload of
2242 * pointer syscall arguments, and for vfs_getname we don't
2243 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2244 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2245 */
2246 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2247 perf_evlist__reset_sample_bit(evlist, ID);
2248 }
fde54b78 2249 }
514f1c67 2250
f15eb531
NK
2251 signal(SIGCHLD, sig_handler);
2252 signal(SIGINT, sig_handler);
2253
2254 if (forks) {
6ef73ec4 2255 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2256 argv, false, NULL);
f15eb531 2257 if (err < 0) {
c24ff998 2258 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2259 goto out_delete_evlist;
f15eb531
NK
2260 }
2261 }
2262
514f1c67 2263 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2264 if (err < 0)
2265 goto out_error_open;
514f1c67 2266
ba504235
WN
2267 err = bpf__apply_obj_config();
2268 if (err) {
2269 char errbuf[BUFSIZ];
2270
2271 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2272 pr_err("ERROR: Apply config to BPF failed: %s\n",
2273 errbuf);
2274 goto out_error_open;
2275 }
2276
241b057c
ACM
2277 /*
2278 * Better not use !target__has_task() here because we need to cover the
2279 * case where no threads were specified in the command line, but a
2280 * workload was, and in that case we will fill in the thread_map when
2281 * we fork the workload in perf_evlist__prepare_workload.
2282 */
f078c385
ACM
2283 if (trace->filter_pids.nr > 0)
2284 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2285 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2286 err = perf_evlist__set_filter_pid(evlist, getpid());
2287
94ad89bc
ACM
2288 if (err < 0)
2289 goto out_error_mem;
2290
19867b61
ACM
2291 if (trace->ev_qualifier_ids.nr > 0) {
2292 err = trace__set_ev_qualifier_filter(trace);
2293 if (err < 0)
2294 goto out_errno;
19867b61 2295
2e5e5f87
ACM
2296 pr_debug("event qualifier tracepoint filter: %s\n",
2297 trace->syscalls.events.sys_exit->filter);
2298 }
19867b61 2299
94ad89bc
ACM
2300 err = perf_evlist__apply_filters(evlist, &evsel);
2301 if (err < 0)
2302 goto out_error_apply_filters;
241b057c 2303
f885037e 2304 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2305 if (err < 0)
2306 goto out_error_mmap;
514f1c67 2307
e36b7821 2308 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2309 perf_evlist__enable(evlist);
2310
f15eb531
NK
2311 if (forks)
2312 perf_evlist__start_workload(evlist);
2313
e36b7821
AB
2314 if (trace->opts.initial_delay) {
2315 usleep(trace->opts.initial_delay * 1000);
2316 perf_evlist__enable(evlist);
2317 }
2318
e13798c7 2319 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2320 evlist->threads->nr > 1 ||
2321 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2322again:
efd5745e 2323 before = trace->nr_events;
514f1c67
ACM
2324
2325 for (i = 0; i < evlist->nr_mmaps; i++) {
2326 union perf_event *event;
2327
2328 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2329 struct perf_sample sample;
514f1c67 2330
efd5745e 2331 ++trace->nr_events;
514f1c67 2332
514f1c67
ACM
2333 err = perf_evlist__parse_sample(evlist, event, &sample);
2334 if (err) {
c24ff998 2335 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2336 goto next_event;
514f1c67
ACM
2337 }
2338
ddbb1b13 2339 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2340next_event:
2341 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2342
ba209f85
ACM
2343 if (interrupted)
2344 goto out_disable;
02ac5421
ACM
2345
2346 if (done && !draining) {
2347 perf_evlist__disable(evlist);
2348 draining = true;
2349 }
514f1c67
ACM
2350 }
2351 }
2352
efd5745e 2353 if (trace->nr_events == before) {
ba209f85 2354 int timeout = done ? 100 : -1;
f15eb531 2355
46fb3c21
ACM
2356 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2357 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2358 draining = true;
2359
ba209f85 2360 goto again;
46fb3c21 2361 }
ba209f85
ACM
2362 } else {
2363 goto again;
f15eb531
NK
2364 }
2365
ba209f85 2366out_disable:
f3b623b8
ACM
2367 thread__zput(trace->current);
2368
ba209f85 2369 perf_evlist__disable(evlist);
514f1c67 2370
c522739d
ACM
2371 if (!err) {
2372 if (trace->summary)
2373 trace__fprintf_thread_summary(trace, trace->output);
2374
2375 if (trace->show_tool_stats) {
2376 fprintf(trace->output, "Stats:\n "
2377 " vfs_getname : %" PRIu64 "\n"
2378 " proc_getname: %" PRIu64 "\n",
2379 trace->stats.vfs_getname,
2380 trace->stats.proc_getname);
2381 }
2382 }
bf2575c1 2383
514f1c67
ACM
2384out_delete_evlist:
2385 perf_evlist__delete(evlist);
14a052df 2386 trace->evlist = NULL;
75b757ca 2387 trace->live = false;
514f1c67 2388 return err;
6ef068cb
ACM
2389{
2390 char errbuf[BUFSIZ];
a8f23d8f 2391
2cc990ba 2392out_error_sched_stat_runtime:
988bdb31 2393 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2394 goto out_error;
2395
801c67b0 2396out_error_raw_syscalls:
988bdb31 2397 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2398 goto out_error;
2399
e09b18d4
ACM
2400out_error_mmap:
2401 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2402 goto out_error;
2403
a8f23d8f
ACM
2404out_error_open:
2405 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2406
2407out_error:
6ef068cb 2408 fprintf(trace->output, "%s\n", errbuf);
87f91868 2409 goto out_delete_evlist;
94ad89bc
ACM
2410
2411out_error_apply_filters:
2412 fprintf(trace->output,
2413 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2414 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2415 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2416 goto out_delete_evlist;
514f1c67 2417}
5ed08dae
ACM
2418out_error_mem:
2419 fprintf(trace->output, "Not enough memory to run!\n");
2420 goto out_delete_evlist;
19867b61
ACM
2421
2422out_errno:
2423 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2424 goto out_delete_evlist;
a8f23d8f 2425}
514f1c67 2426
6810fc91
DA
2427static int trace__replay(struct trace *trace)
2428{
2429 const struct perf_evsel_str_handler handlers[] = {
c522739d 2430 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2431 };
f5fc1412
JO
2432 struct perf_data_file file = {
2433 .path = input_name,
2434 .mode = PERF_DATA_MODE_READ,
e366a6d8 2435 .force = trace->force,
f5fc1412 2436 };
6810fc91 2437 struct perf_session *session;
003824e8 2438 struct perf_evsel *evsel;
6810fc91
DA
2439 int err = -1;
2440
2441 trace->tool.sample = trace__process_sample;
2442 trace->tool.mmap = perf_event__process_mmap;
384c671e 2443 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2444 trace->tool.comm = perf_event__process_comm;
2445 trace->tool.exit = perf_event__process_exit;
2446 trace->tool.fork = perf_event__process_fork;
2447 trace->tool.attr = perf_event__process_attr;
f3b3614a 2448 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2449 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2450 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2451
0a8cb85c 2452 trace->tool.ordered_events = true;
6810fc91
DA
2453 trace->tool.ordering_requires_timestamps = true;
2454
2455 /* add tid to output */
2456 trace->multiple_threads = true;
2457
f5fc1412 2458 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2459 if (session == NULL)
52e02834 2460 return -1;
6810fc91 2461
aa07df6e
DA
2462 if (trace->opts.target.pid)
2463 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2464
2465 if (trace->opts.target.tid)
2466 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2467
0a7e6d1b 2468 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2469 goto out;
2470
8fb598e5
DA
2471 trace->host = &session->machines.host;
2472
6810fc91
DA
2473 err = perf_session__set_tracepoints_handlers(session, handlers);
2474 if (err)
2475 goto out;
2476
003824e8
NK
2477 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2478 "raw_syscalls:sys_enter");
9aca7f17
DA
2479 /* older kernels have syscalls tp versus raw_syscalls */
2480 if (evsel == NULL)
2481 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2482 "syscalls:sys_enter");
003824e8 2483
e281a960
SF
2484 if (evsel &&
2485 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2486 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2487 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2488 goto out;
2489 }
2490
2491 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2492 "raw_syscalls:sys_exit");
9aca7f17
DA
2493 if (evsel == NULL)
2494 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2495 "syscalls:sys_exit");
e281a960
SF
2496 if (evsel &&
2497 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2498 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2499 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2500 goto out;
2501 }
2502
e5cadb93 2503 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2504 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2505 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2506 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2507 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2508 evsel->handler = trace__pgfault;
2509 }
2510
6810fc91
DA
2511 setup_pager();
2512
b7b61cbe 2513 err = perf_session__process_events(session);
6810fc91
DA
2514 if (err)
2515 pr_err("Failed to process events, error %d", err);
2516
bf2575c1
DA
2517 else if (trace->summary)
2518 trace__fprintf_thread_summary(trace, trace->output);
2519
6810fc91
DA
2520out:
2521 perf_session__delete(session);
2522
2523 return err;
2524}
2525
1302d88e
ACM
2526static size_t trace__fprintf_threads_header(FILE *fp)
2527{
2528 size_t printed;
2529
99ff7150 2530 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2531
2532 return printed;
2533}
2534
b535d523
ACM
2535DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2536 struct stats *stats;
2537 double msecs;
2538 int syscall;
2539)
2540{
2541 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2542 struct stats *stats = source->priv;
2543
2544 entry->syscall = source->i;
2545 entry->stats = stats;
2546 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2547}
2548
bf2575c1
DA
2549static size_t thread__dump_stats(struct thread_trace *ttrace,
2550 struct trace *trace, FILE *fp)
2551{
bf2575c1
DA
2552 size_t printed = 0;
2553 struct syscall *sc;
b535d523
ACM
2554 struct rb_node *nd;
2555 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2556
b535d523 2557 if (syscall_stats == NULL)
bf2575c1
DA
2558 return 0;
2559
2560 printed += fprintf(fp, "\n");
2561
834fd46d
MW
2562 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2563 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2564 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2565
98a91837 2566 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2567 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2568 if (stats) {
2569 double min = (double)(stats->min) / NSEC_PER_MSEC;
2570 double max = (double)(stats->max) / NSEC_PER_MSEC;
2571 double avg = avg_stats(stats);
2572 double pct;
2573 u64 n = (u64) stats->n;
2574
2575 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2576 avg /= NSEC_PER_MSEC;
2577
b535d523 2578 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2579 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2580 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2581 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2582 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2583 }
bf2575c1
DA
2584 }
2585
b535d523 2586 resort_rb__delete(syscall_stats);
bf2575c1 2587 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2588
2589 return printed;
2590}
2591
96c14451 2592static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2593{
96c14451 2594 size_t printed = 0;
89dceb22 2595 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2596 double ratio;
2597
2598 if (ttrace == NULL)
2599 return 0;
2600
2601 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2602
15e65c69 2603 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2604 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2605 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2606 if (ttrace->pfmaj)
2607 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2608 if (ttrace->pfmin)
2609 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2610 if (trace->sched)
2611 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2612 else if (fputc('\n', fp) != EOF)
2613 ++printed;
2614
bf2575c1 2615 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2616
96c14451
ACM
2617 return printed;
2618}
896cbb56 2619
96c14451
ACM
2620static unsigned long thread__nr_events(struct thread_trace *ttrace)
2621{
2622 return ttrace ? ttrace->nr_events : 0;
2623}
2624
2625DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2626 struct thread *thread;
2627)
2628{
2629 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2630}
2631
1302d88e
ACM
2632static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2633{
96c14451
ACM
2634 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2635 size_t printed = trace__fprintf_threads_header(fp);
2636 struct rb_node *nd;
1302d88e 2637
96c14451
ACM
2638 if (threads == NULL) {
2639 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2640 return 0;
2641 }
2642
98a91837 2643 resort_rb__for_each_entry(nd, threads)
96c14451 2644 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2645
96c14451
ACM
2646 resort_rb__delete(threads);
2647
2648 return printed;
1302d88e
ACM
2649}
2650
ae9ed035
ACM
2651static int trace__set_duration(const struct option *opt, const char *str,
2652 int unset __maybe_unused)
2653{
2654 struct trace *trace = opt->value;
2655
2656 trace->duration_filter = atof(str);
2657 return 0;
2658}
2659
f078c385
ACM
2660static int trace__set_filter_pids(const struct option *opt, const char *str,
2661 int unset __maybe_unused)
2662{
2663 int ret = -1;
2664 size_t i;
2665 struct trace *trace = opt->value;
2666 /*
2667 * FIXME: introduce a intarray class, plain parse csv and create a
2668 * { int nr, int entries[] } struct...
2669 */
2670 struct intlist *list = intlist__new(str);
2671
2672 if (list == NULL)
2673 return -1;
2674
2675 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2676 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2677
2678 if (trace->filter_pids.entries == NULL)
2679 goto out;
2680
2681 trace->filter_pids.entries[0] = getpid();
2682
2683 for (i = 1; i < trace->filter_pids.nr; ++i)
2684 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2685
2686 intlist__delete(list);
2687 ret = 0;
2688out:
2689 return ret;
2690}
2691
c24ff998
ACM
2692static int trace__open_output(struct trace *trace, const char *filename)
2693{
2694 struct stat st;
2695
2696 if (!stat(filename, &st) && st.st_size) {
2697 char oldname[PATH_MAX];
2698
2699 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2700 unlink(oldname);
2701 rename(filename, oldname);
2702 }
2703
2704 trace->output = fopen(filename, "w");
2705
2706 return trace->output == NULL ? -errno : 0;
2707}
2708
598d02c5
SF
2709static int parse_pagefaults(const struct option *opt, const char *str,
2710 int unset __maybe_unused)
2711{
2712 int *trace_pgfaults = opt->value;
2713
2714 if (strcmp(str, "all") == 0)
2715 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2716 else if (strcmp(str, "maj") == 0)
2717 *trace_pgfaults |= TRACE_PFMAJ;
2718 else if (strcmp(str, "min") == 0)
2719 *trace_pgfaults |= TRACE_PFMIN;
2720 else
2721 return -1;
2722
2723 return 0;
2724}
2725
14a052df
ACM
2726static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2727{
2728 struct perf_evsel *evsel;
2729
e5cadb93 2730 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2731 evsel->handler = handler;
2732}
2733
017037ff
ACM
2734/*
2735 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2736 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2737 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2738 *
2739 * It'd be better to introduce a parse_options() variant that would return a
2740 * list with the terms it didn't match to an event...
2741 */
2742static int trace__parse_events_option(const struct option *opt, const char *str,
2743 int unset __maybe_unused)
2744{
2745 struct trace *trace = (struct trace *)opt->value;
2746 const char *s = str;
2747 char *sep = NULL, *lists[2] = { NULL, NULL, };
2748 int len = strlen(str), err = -1, list;
2749 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2750 char group_name[PATH_MAX];
2751
2752 if (strace_groups_dir == NULL)
2753 return -1;
2754
2755 if (*s == '!') {
2756 ++s;
2757 trace->not_ev_qualifier = true;
2758 }
2759
2760 while (1) {
2761 if ((sep = strchr(s, ',')) != NULL)
2762 *sep = '\0';
2763
2764 list = 0;
2765 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2766 list = 1;
2767 } else {
2768 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2769 if (access(group_name, R_OK) == 0)
2770 list = 1;
2771 }
2772
2773 if (lists[list]) {
2774 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2775 } else {
2776 lists[list] = malloc(len);
2777 if (lists[list] == NULL)
2778 goto out;
2779 strcpy(lists[list], s);
2780 }
2781
2782 if (!sep)
2783 break;
2784
2785 *sep = ',';
2786 s = sep + 1;
2787 }
2788
2789 if (lists[1] != NULL) {
2790 struct strlist_config slist_config = {
2791 .dirname = strace_groups_dir,
2792 };
2793
2794 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2795 if (trace->ev_qualifier == NULL) {
2796 fputs("Not enough memory to parse event qualifier", trace->output);
2797 goto out;
2798 }
2799
2800 if (trace__validate_ev_qualifier(trace))
2801 goto out;
2802 }
2803
2804 err = 0;
2805
2806 if (lists[0]) {
2807 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2808 "event selector. use 'perf list' to list available events",
2809 parse_events_option);
2810 err = parse_events_option(&o, lists[0], 0);
2811 }
2812out:
2813 if (sep)
2814 *sep = ',';
2815
2816 return err;
2817}
2818
b0ad8ea6 2819int cmd_trace(int argc, const char **argv)
514f1c67 2820{
6fdd9cb7 2821 const char *trace_usage[] = {
f15eb531
NK
2822 "perf trace [<options>] [<command>]",
2823 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2824 "perf trace record [<options>] [<command>]",
2825 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2826 NULL
2827 };
2828 struct trace trace = {
514f1c67
ACM
2829 .syscalls = {
2830 . max = -1,
2831 },
2832 .opts = {
2833 .target = {
2834 .uid = UINT_MAX,
2835 .uses_mmap = true,
2836 },
2837 .user_freq = UINT_MAX,
2838 .user_interval = ULLONG_MAX,
509051ea 2839 .no_buffering = true,
38d5447d 2840 .mmap_pages = UINT_MAX,
9d9cad76 2841 .proc_map_timeout = 500,
514f1c67 2842 },
007d66a0 2843 .output = stderr,
50c95cbd 2844 .show_comm = true,
e281a960 2845 .trace_syscalls = true,
44621819 2846 .kernel_syscallchains = false,
05614993 2847 .max_stack = UINT_MAX,
514f1c67 2848 };
c24ff998 2849 const char *output_name = NULL;
514f1c67 2850 const struct option trace_options[] = {
017037ff
ACM
2851 OPT_CALLBACK('e', "event", &trace, "event",
2852 "event/syscall selector. use 'perf list' to list available events",
2853 trace__parse_events_option),
50c95cbd
ACM
2854 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2855 "show the thread COMM next to its id"),
c522739d 2856 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2857 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2858 trace__parse_events_option),
c24ff998 2859 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2860 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2861 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2862 "trace events on existing process id"),
ac9be8ee 2863 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2864 "trace events on existing thread id"),
fa0e4ffe
ACM
2865 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2866 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2867 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2868 "system-wide collection from all CPUs"),
ac9be8ee 2869 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2870 "list of cpus to monitor"),
6810fc91 2871 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2872 "child tasks do not inherit counters"),
994a1f78
JO
2873 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2874 "number of mmap data pages",
2875 perf_evlist__parse_mmap_pages),
ac9be8ee 2876 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2877 "user to profile"),
ae9ed035
ACM
2878 OPT_CALLBACK(0, "duration", &trace, "float",
2879 "show only events with duration > N.M ms",
2880 trace__set_duration),
1302d88e 2881 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2882 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2883 OPT_BOOLEAN('T', "time", &trace.full_time,
2884 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2885 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2886 "Show only syscall summary with statistics"),
2887 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2888 "Show all syscalls and summary with statistics"),
598d02c5
SF
2889 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2890 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2891 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2892 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2893 OPT_CALLBACK(0, "call-graph", &trace.opts,
2894 "record_mode[,record_size]", record_callchain_help,
2895 &record_parse_callchain_opt),
44621819
ACM
2896 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2897 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2898 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2899 "Set the minimum stack depth when parsing the callchain, "
2900 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2901 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2902 "Set the maximum stack depth when parsing the callchain, "
2903 "anything beyond the specified depth will be ignored. "
4cb93446 2904 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2905 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2906 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2907 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2908 "ms to wait before starting measurement after program "
2909 "start"),
514f1c67
ACM
2910 OPT_END()
2911 };
ccd62a89 2912 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2913 bool mmap_pages_user_set = true;
6fdd9cb7 2914 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2915 int err;
32caf0d1 2916 char bf[BUFSIZ];
514f1c67 2917
4d08cb80
ACM
2918 signal(SIGSEGV, sighandler_dump_stack);
2919 signal(SIGFPE, sighandler_dump_stack);
2920
14a052df 2921 trace.evlist = perf_evlist__new();
fd0db102 2922 trace.sctbl = syscalltbl__new();
14a052df 2923
fd0db102 2924 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2925 pr_err("Not enough memory to run!\n");
ff8f695c 2926 err = -ENOMEM;
14a052df
ACM
2927 goto out;
2928 }
2929
6fdd9cb7
YS
2930 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2931 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2932
d7888573
WN
2933 err = bpf__setup_stdout(trace.evlist);
2934 if (err) {
2935 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2936 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2937 goto out;
2938 }
2939
59247e33
ACM
2940 err = -1;
2941
598d02c5
SF
2942 if (trace.trace_pgfaults) {
2943 trace.opts.sample_address = true;
2944 trace.opts.sample_time = true;
2945 }
2946
f3e459d1
ACM
2947 if (trace.opts.mmap_pages == UINT_MAX)
2948 mmap_pages_user_set = false;
2949
05614993 2950 if (trace.max_stack == UINT_MAX) {
fe176085 2951 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2952 max_stack_user_set = false;
2953 }
2954
2955#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 2956 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
2957 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
2958#endif
2959
2ddd5c04 2960 if (callchain_param.enabled) {
f3e459d1
ACM
2961 if (!mmap_pages_user_set && geteuid() == 0)
2962 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
2963
566a0885 2964 symbol_conf.use_callchain = true;
f3e459d1 2965 }
566a0885 2966
14a052df
ACM
2967 if (trace.evlist->nr_entries > 0)
2968 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
2969
1e28fe0a
SF
2970 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
2971 return trace__record(&trace, argc-1, &argv[1]);
2972
2973 /* summary_only implies summary option, but don't overwrite summary if set */
2974 if (trace.summary_only)
2975 trace.summary = trace.summary_only;
2976
726f3234
ACM
2977 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
2978 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
2979 pr_err("Please specify something to trace.\n");
2980 return -1;
2981 }
2982
017037ff 2983 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
2984 pr_err("The -e option can't be used with --no-syscalls.\n");
2985 goto out;
2986 }
2987
c24ff998
ACM
2988 if (output_name != NULL) {
2989 err = trace__open_output(&trace, output_name);
2990 if (err < 0) {
2991 perror("failed to create output file");
2992 goto out;
2993 }
2994 }
2995
fd0db102
ACM
2996 trace.open_id = syscalltbl__id(trace.sctbl, "open");
2997
602ad878 2998 err = target__validate(&trace.opts.target);
32caf0d1 2999 if (err) {
602ad878 3000 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3001 fprintf(trace.output, "%s", bf);
3002 goto out_close;
32caf0d1
NK
3003 }
3004
602ad878 3005 err = target__parse_uid(&trace.opts.target);
514f1c67 3006 if (err) {
602ad878 3007 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3008 fprintf(trace.output, "%s", bf);
3009 goto out_close;
514f1c67
ACM
3010 }
3011
602ad878 3012 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3013 trace.opts.target.system_wide = true;
3014
6810fc91
DA
3015 if (input_name)
3016 err = trace__replay(&trace);
3017 else
3018 err = trace__run(&trace, argc, argv);
1302d88e 3019
c24ff998
ACM
3020out_close:
3021 if (output_name != NULL)
3022 fclose(trace.output);
3023out:
1302d88e 3024 return err;
514f1c67 3025}