]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - tools/perf/builtin-trace.c
perf evsel: Check if callchain is enabled before setting it up
[mirror_ubuntu-focal-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
5ab8c689 24#include "util/event.h"
514f1c67 25#include "util/evlist.h"
4b6ab94e 26#include <subcmd/exec-cmd.h>
752fde44 27#include "util/machine.h"
9a3993d4 28#include "util/path.h"
6810fc91 29#include "util/session.h"
752fde44 30#include "util/thread.h"
4b6ab94e 31#include <subcmd/parse-options.h>
2ae3a312 32#include "util/strlist.h"
bdc89661 33#include "util/intlist.h"
514f1c67 34#include "util/thread_map.h"
bf2575c1 35#include "util/stat.h"
fd5cead2 36#include "trace/beauty/beauty.h"
97978b3e 37#include "trace-event.h"
9aca7f17 38#include "util/parse-events.h"
ba504235 39#include "util/bpf-loader.h"
566a0885 40#include "callchain.h"
fea01392 41#include "print_binary.h"
a067558e 42#include "string2.h"
fd0db102 43#include "syscalltbl.h"
96c14451 44#include "rb_resort.h"
514f1c67 45
a43783ae 46#include <errno.h>
fd20e811 47#include <inttypes.h>
fd0db102 48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c
ACM
54#include <linux/filter.h>
55#include <linux/audit.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
83a51694
ACM
67#ifndef F_LINUX_SPECIFIC_BASE
68# define F_LINUX_SPECIFIC_BASE 1024
69#endif
70
d1d438a3
ACM
71struct trace {
72 struct perf_tool tool;
fd0db102 73 struct syscalltbl *sctbl;
d1d438a3
ACM
74 struct {
75 int max;
76 struct syscall *table;
77 struct {
78 struct perf_evsel *sys_enter,
79 *sys_exit;
80 } events;
81 } syscalls;
82 struct record_opts opts;
83 struct perf_evlist *evlist;
84 struct machine *host;
85 struct thread *current;
86 u64 base_time;
87 FILE *output;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
90 struct {
91 size_t nr;
92 int *entries;
93 } ev_qualifier_ids;
d1d438a3
ACM
94 struct {
95 size_t nr;
96 pid_t *entries;
97 } filter_pids;
98 double duration_filter;
99 double runtime_ms;
100 struct {
101 u64 vfs_getname,
102 proc_getname;
103 } stats;
c6d4a494 104 unsigned int max_stack;
5cf9c84e 105 unsigned int min_stack;
d1d438a3
ACM
106 bool not_ev_qualifier;
107 bool live;
108 bool full_time;
109 bool sched;
110 bool multiple_threads;
111 bool summary;
112 bool summary_only;
113 bool show_comm;
114 bool show_tool_stats;
115 bool trace_syscalls;
44621819 116 bool kernel_syscallchains;
d1d438a3
ACM
117 bool force;
118 bool vfs_getname;
119 int trace_pgfaults;
fd0db102 120 int open_id;
d1d438a3 121};
a1c2552d 122
77170988
ACM
123struct tp_field {
124 int offset;
125 union {
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128 };
129};
130
131#define TP_UINT_FIELD(bits) \
132static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
55d43bca
DA
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return value; \
77170988
ACM
137}
138
139TP_UINT_FIELD(8);
140TP_UINT_FIELD(16);
141TP_UINT_FIELD(32);
142TP_UINT_FIELD(64);
143
144#define TP_UINT_FIELD__SWAPPED(bits) \
145static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146{ \
55d43bca
DA
147 u##bits value; \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
149 return bswap_##bits(value);\
150}
151
152TP_UINT_FIELD__SWAPPED(16);
153TP_UINT_FIELD__SWAPPED(32);
154TP_UINT_FIELD__SWAPPED(64);
155
156static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
158 bool needs_swap)
159{
160 field->offset = format_field->offset;
161
162 switch (format_field->size) {
163 case 1:
164 field->integer = tp_field__u8;
165 break;
166 case 2:
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168 break;
169 case 4:
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171 break;
172 case 8:
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174 break;
175 default:
176 return -1;
177 }
178
179 return 0;
180}
181
182static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183{
184 return sample->raw_data + field->offset;
185}
186
187static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188{
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
191 return 0;
192}
193
194struct syscall_tp {
195 struct tp_field id;
196 union {
197 struct tp_field args, ret;
198 };
199};
200
201static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
211}
212
213#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
219 const char *name)
220{
221 struct format_field *format_field = perf_evsel__field(evsel, name);
222
223 if (format_field == NULL)
224 return -1;
225
226 return tp_field__init_ptr(field, format_field);
227}
228
229#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234{
04662523 235 zfree(&evsel->priv);
77170988
ACM
236 perf_evsel__delete(evsel);
237}
238
96695d44
NK
239static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240{
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244 goto out_delete;
245
246 evsel->handler = handler;
247 return 0;
248 }
249
250 return -ENOMEM;
251
252out_delete:
04662523 253 zfree(&evsel->priv);
96695d44
NK
254 return -ENOENT;
255}
256
ef503831 257static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 258{
ef503831 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 260
9aca7f17 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 262 if (IS_ERR(evsel))
9aca7f17
DA
263 evsel = perf_evsel__newtp("syscalls", direction);
264
8dd2a131
JO
265 if (IS_ERR(evsel))
266 return NULL;
267
268 if (perf_evsel__init_syscall_tp(evsel, handler))
269 goto out_delete;
77170988
ACM
270
271 return evsel;
272
273out_delete:
274 perf_evsel__delete_priv(evsel);
275 return NULL;
276}
277
278#define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
281
282#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
285
0ae79636
ACM
286size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287{
288 int idx = val - sa->offset;
1f115cb7 289
0ae79636
ACM
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
1f115cb7 292
0ae79636 293 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
0ae79636 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
301}
302
975b7c2f
ACM
303static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
305{
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307}
308
1f115cb7
ACM
309#define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
83a51694
ACM
311struct strarrays {
312 int nr_entries;
313 struct strarray **entries;
314};
315
316#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
318 .entries = array, \
319}
320
274e86fd
ACM
321size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
83a51694
ACM
323{
324 struct strarrays *sas = arg->parm;
325 int i;
326
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
330
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
333 break;
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
335 }
336 }
337
338 return scnprintf(bf, size, "%d", arg->val);
339}
340
48e1f91a
ACM
341#ifndef AT_FDCWD
342#define AT_FDCWD -100
343#endif
344
75b757ca
ACM
345static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
346 struct syscall_arg *arg)
347{
348 int fd = arg->val;
349
350 if (fd == AT_FDCWD)
351 return scnprintf(bf, size, "CWD");
352
353 return syscall_arg__scnprintf_fd(bf, size, arg);
354}
355
356#define SCA_FDAT syscall_arg__scnprintf_fd_at
357
358static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
359 struct syscall_arg *arg);
360
361#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
362
2c2b1623 363size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 364{
01533e97 365 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
366}
367
2c2b1623 368size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
369{
370 return scnprintf(bf, size, "%d", arg->val);
371}
372
5dde91ed
ACM
373size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
374{
375 return scnprintf(bf, size, "%ld", arg->val);
376}
377
729a7841
ACM
378static const char *bpf_cmd[] = {
379 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
380 "MAP_GET_NEXT_KEY", "PROG_LOAD",
381};
382static DEFINE_STRARRAY(bpf_cmd);
383
03e3adc9
ACM
384static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
385static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 386
1f115cb7
ACM
387static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
388static DEFINE_STRARRAY(itimers);
389
b62bee1b
ACM
390static const char *keyctl_options[] = {
391 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
392 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
393 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
394 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
395 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
396};
397static DEFINE_STRARRAY(keyctl_options);
398
efe6b882
ACM
399static const char *whences[] = { "SET", "CUR", "END",
400#ifdef SEEK_DATA
401"DATA",
402#endif
403#ifdef SEEK_HOLE
404"HOLE",
405#endif
406};
407static DEFINE_STRARRAY(whences);
f9da0b0c 408
80f587d5
ACM
409static const char *fcntl_cmds[] = {
410 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
411 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
412 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
413 "GETOWNER_UIDS",
80f587d5
ACM
414};
415static DEFINE_STRARRAY(fcntl_cmds);
416
83a51694
ACM
417static const char *fcntl_linux_specific_cmds[] = {
418 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
419 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 420 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
421};
422
423static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
424
425static struct strarray *fcntl_cmds_arrays[] = {
426 &strarray__fcntl_cmds,
427 &strarray__fcntl_linux_specific_cmds,
428};
429
430static DEFINE_STRARRAYS(fcntl_cmds_arrays);
431
c045bf02
ACM
432static const char *rlimit_resources[] = {
433 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
434 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
435 "RTTIME",
436};
437static DEFINE_STRARRAY(rlimit_resources);
438
eb5b1b14
ACM
439static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
440static DEFINE_STRARRAY(sighow);
441
4f8c1b74
DA
442static const char *clockid[] = {
443 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
444 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
445 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
446};
447static DEFINE_STRARRAY(clockid);
448
e10bce81
ACM
449static const char *socket_families[] = {
450 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
451 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
452 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
453 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
454 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
455 "ALG", "NFC", "VSOCK",
456};
457static DEFINE_STRARRAY(socket_families);
458
51108999
ACM
459static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
460 struct syscall_arg *arg)
461{
462 size_t printed = 0;
463 int mode = arg->val;
464
465 if (mode == F_OK) /* 0 */
466 return scnprintf(bf, size, "F");
467#define P_MODE(n) \
468 if (mode & n##_OK) { \
469 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
470 mode &= ~n##_OK; \
471 }
472
473 P_MODE(R);
474 P_MODE(W);
475 P_MODE(X);
476#undef P_MODE
477
478 if (mode)
479 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
480
481 return printed;
482}
483
484#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
485
f994592d
ACM
486static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
487 struct syscall_arg *arg);
488
489#define SCA_FILENAME syscall_arg__scnprintf_filename
490
46cce19b
ACM
491static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
492 struct syscall_arg *arg)
493{
494 int printed = 0, flags = arg->val;
495
496#define P_FLAG(n) \
497 if (flags & O_##n) { \
498 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
499 flags &= ~O_##n; \
500 }
501
502 P_FLAG(CLOEXEC);
503 P_FLAG(NONBLOCK);
504#undef P_FLAG
505
506 if (flags)
507 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
508
509 return printed;
510}
511
512#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
513
a355a61e
ACM
514#ifndef GRND_NONBLOCK
515#define GRND_NONBLOCK 0x0001
516#endif
517#ifndef GRND_RANDOM
518#define GRND_RANDOM 0x0002
519#endif
520
39878d49
ACM
521static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
522 struct syscall_arg *arg)
523{
524 int printed = 0, flags = arg->val;
525
526#define P_FLAG(n) \
527 if (flags & GRND_##n) { \
528 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
529 flags &= ~GRND_##n; \
530 }
531
532 P_FLAG(RANDOM);
533 P_FLAG(NONBLOCK);
534#undef P_FLAG
535
536 if (flags)
537 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
538
539 return printed;
540}
541
542#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
543
82d4a110
ACM
544#define STRARRAY(name, array) \
545 { .scnprintf = SCA_STRARRAY, \
546 .parm = &strarray__##array, }
453350dd 547
ea8dc3ce 548#include "trace/beauty/eventfd.c"
8bf382ce 549#include "trace/beauty/flock.c"
d5d71e86 550#include "trace/beauty/futex_op.c"
df4cb167 551#include "trace/beauty/mmap.c"
ba2f22cf 552#include "trace/beauty/mode_t.c"
a30e6259 553#include "trace/beauty/msg_flags.c"
8f48df69 554#include "trace/beauty/open_flags.c"
62de344e 555#include "trace/beauty/perf_event_open.c"
d5d71e86 556#include "trace/beauty/pid.c"
a3bca91f 557#include "trace/beauty/sched_policy.c"
f5cd95ea 558#include "trace/beauty/seccomp.c"
12199d8e 559#include "trace/beauty/signum.c"
bbf86c43 560#include "trace/beauty/socket_type.c"
7206b900 561#include "trace/beauty/waitid_options.c"
a3bca91f 562
82d4a110
ACM
563struct syscall_arg_fmt {
564 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
565 void *parm;
c51bdfec 566 const char *name;
d47737d5 567 bool show_zero;
82d4a110
ACM
568};
569
514f1c67
ACM
570static struct syscall_fmt {
571 const char *name;
aec1930b 572 const char *alias;
82d4a110 573 struct syscall_arg_fmt arg[6];
332337da 574 u8 nr_args;
11c8e39f 575 bool errpid;
514f1c67 576 bool timeout;
04b34729 577 bool hexret;
514f1c67 578} syscall_fmts[] = {
1f63139c 579 { .name = "access",
82d4a110 580 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c 581 { .name = "bpf",
82d4a110 582 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 583 { .name = "brk", .hexret = true,
82d4a110 584 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 585 { .name = "clock_gettime",
82d4a110 586 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
587 { .name = "clone", .errpid = true, .nr_args = 5,
588 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
589 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
590 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
591 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
592 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 593 { .name = "close",
82d4a110 594 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 595 { .name = "epoll_ctl",
82d4a110 596 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 597 { .name = "eventfd2",
82d4a110 598 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 599 { .name = "fchmodat",
82d4a110 600 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 601 { .name = "fchownat",
82d4a110 602 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 603 { .name = "fcntl",
82d4a110 604 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
605 .parm = &strarrays__fcntl_cmds_arrays,
606 .show_zero = true, },
82d4a110 607 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 608 { .name = "flock",
82d4a110 609 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
610 { .name = "fstat", .alias = "newfstat", },
611 { .name = "fstatat", .alias = "newfstatat", },
612 { .name = "futex",
82d4a110 613 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
1f63139c 614 { .name = "futimesat",
82d4a110 615 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 616 { .name = "getitimer",
82d4a110 617 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 618 { .name = "getpid", .errpid = true, },
d1d438a3 619 { .name = "getpgid", .errpid = true, },
c65f1070 620 { .name = "getppid", .errpid = true, },
1f63139c 621 { .name = "getrandom",
82d4a110 622 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 623 { .name = "getrlimit",
82d4a110 624 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 625 { .name = "gettid", .errpid = true, },
1f63139c 626 { .name = "ioctl",
82d4a110 627 .arg = {
844ae5b4
ACM
628#if defined(__i386__) || defined(__x86_64__)
629/*
630 * FIXME: Make this available to all arches.
631 */
1cc47f2d 632 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 633 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 634#else
82d4a110 635 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 636#endif
1de3038d
ACM
637 { .name = "kcmp", .nr_args = 5,
638 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
639 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
640 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
641 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
642 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 643 { .name = "keyctl",
82d4a110 644 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 645 { .name = "kill",
82d4a110 646 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 647 { .name = "linkat",
82d4a110 648 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 649 { .name = "lseek",
82d4a110 650 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
651 { .name = "lstat", .alias = "newlstat", },
652 { .name = "madvise",
82d4a110
ACM
653 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
654 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 655 { .name = "mkdirat",
82d4a110 656 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 657 { .name = "mknodat",
82d4a110 658 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 659 { .name = "mlock",
82d4a110 660 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 661 { .name = "mlockall",
82d4a110 662 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 663 { .name = "mmap", .hexret = true,
54265664
JO
664/* The standard mmap maps to old_mmap on s390x */
665#if defined(__s390x__)
666 .alias = "old_mmap",
667#endif
82d4a110
ACM
668 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
669 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
670 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 671 { .name = "mprotect",
82d4a110
ACM
672 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
673 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 674 { .name = "mq_unlink",
82d4a110 675 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 676 { .name = "mremap", .hexret = true,
82d4a110
ACM
677 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
678 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
679 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 680 { .name = "munlock",
82d4a110 681 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 682 { .name = "munmap",
82d4a110 683 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 684 { .name = "name_to_handle_at",
82d4a110 685 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 686 { .name = "newfstatat",
82d4a110 687 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 688 { .name = "open",
82d4a110 689 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 690 { .name = "open_by_handle_at",
82d4a110
ACM
691 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
692 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 693 { .name = "openat",
82d4a110
ACM
694 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
695 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 696 { .name = "perf_event_open",
82d4a110
ACM
697 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
698 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
699 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 700 { .name = "pipe2",
82d4a110 701 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
702 { .name = "pkey_alloc",
703 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
704 { .name = "pkey_free",
705 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
706 { .name = "pkey_mprotect",
707 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
708 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
709 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
710 { .name = "poll", .timeout = true, },
711 { .name = "ppoll", .timeout = true, },
d688d037
ACM
712 { .name = "prctl", .alias = "arch_prctl",
713 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
714 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
715 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
716 { .name = "pread", .alias = "pread64", },
717 { .name = "preadv", .alias = "pread", },
718 { .name = "prlimit64",
82d4a110 719 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
720 { .name = "pwrite", .alias = "pwrite64", },
721 { .name = "readlinkat",
82d4a110 722 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 723 { .name = "recvfrom",
82d4a110 724 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 725 { .name = "recvmmsg",
82d4a110 726 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 727 { .name = "recvmsg",
82d4a110 728 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 729 { .name = "renameat",
82d4a110 730 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 731 { .name = "rt_sigaction",
82d4a110 732 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 733 { .name = "rt_sigprocmask",
82d4a110 734 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 735 { .name = "rt_sigqueueinfo",
82d4a110 736 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 737 { .name = "rt_tgsigqueueinfo",
82d4a110 738 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 739 { .name = "sched_setscheduler",
82d4a110 740 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 741 { .name = "seccomp",
82d4a110
ACM
742 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
743 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
744 { .name = "select", .timeout = true, },
745 { .name = "sendmmsg",
82d4a110 746 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 747 { .name = "sendmsg",
82d4a110 748 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 749 { .name = "sendto",
82d4a110 750 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 751 { .name = "set_tid_address", .errpid = true, },
1f63139c 752 { .name = "setitimer",
82d4a110 753 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 754 { .name = "setrlimit",
82d4a110 755 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 756 { .name = "socket",
82d4a110
ACM
757 .arg = { [0] = STRARRAY(family, socket_families),
758 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c 759 { .name = "socketpair",
82d4a110
ACM
760 .arg = { [0] = STRARRAY(family, socket_families),
761 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c
ACM
762 { .name = "stat", .alias = "newstat", },
763 { .name = "statx",
82d4a110
ACM
764 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
765 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
766 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 767 { .name = "swapoff",
82d4a110 768 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 769 { .name = "swapon",
82d4a110 770 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 771 { .name = "symlinkat",
82d4a110 772 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 773 { .name = "tgkill",
82d4a110 774 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 775 { .name = "tkill",
82d4a110 776 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
777 { .name = "uname", .alias = "newuname", },
778 { .name = "unlinkat",
82d4a110 779 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 780 { .name = "utimensat",
82d4a110 781 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 782 { .name = "wait4", .errpid = true,
82d4a110 783 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 784 { .name = "waitid", .errpid = true,
82d4a110 785 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
786};
787
788static int syscall_fmt__cmp(const void *name, const void *fmtp)
789{
790 const struct syscall_fmt *fmt = fmtp;
791 return strcmp(name, fmt->name);
792}
793
794static struct syscall_fmt *syscall_fmt__find(const char *name)
795{
796 const int nmemb = ARRAY_SIZE(syscall_fmts);
797 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
798}
799
800struct syscall {
801 struct event_format *tp_format;
f208bd8d
ACM
802 int nr_args;
803 struct format_field *args;
514f1c67 804 const char *name;
5089f20e 805 bool is_exit;
514f1c67 806 struct syscall_fmt *fmt;
82d4a110 807 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
808};
809
fd2b2975
ACM
810/*
811 * We need to have this 'calculated' boolean because in some cases we really
812 * don't know what is the duration of a syscall, for instance, when we start
813 * a session and some threads are waiting for a syscall to finish, say 'poll',
814 * in which case all we can do is to print "( ? ) for duration and for the
815 * start timestamp.
816 */
817static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
818{
819 double duration = (double)t / NSEC_PER_MSEC;
820 size_t printed = fprintf(fp, "(");
821
fd2b2975
ACM
822 if (!calculated)
823 printed += fprintf(fp, " ? ");
824 else if (duration >= 1.0)
60c907ab
ACM
825 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
826 else if (duration >= 0.01)
827 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
828 else
829 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 830 return printed + fprintf(fp, "): ");
60c907ab
ACM
831}
832
f994592d
ACM
833/**
834 * filename.ptr: The filename char pointer that will be vfs_getname'd
835 * filename.entry_str_pos: Where to insert the string translated from
836 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
837 * ret_scnprintf: syscall args may set this to a different syscall return
838 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 839 */
752fde44
ACM
840struct thread_trace {
841 u64 entry_time;
752fde44 842 bool entry_pending;
efd5745e 843 unsigned long nr_events;
a2ea67d7 844 unsigned long pfmaj, pfmin;
752fde44 845 char *entry_str;
1302d88e 846 double runtime_ms;
7ee57434 847 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
848 struct {
849 unsigned long ptr;
7f4f8001
ACM
850 short int entry_str_pos;
851 bool pending_open;
852 unsigned int namelen;
853 char *name;
f994592d 854 } filename;
75b757ca
ACM
855 struct {
856 int max;
857 char **table;
858 } paths;
bf2575c1
DA
859
860 struct intlist *syscall_stats;
752fde44
ACM
861};
862
863static struct thread_trace *thread_trace__new(void)
864{
75b757ca
ACM
865 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
866
867 if (ttrace)
868 ttrace->paths.max = -1;
869
bf2575c1
DA
870 ttrace->syscall_stats = intlist__new(NULL);
871
75b757ca 872 return ttrace;
752fde44
ACM
873}
874
c24ff998 875static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 876{
efd5745e
ACM
877 struct thread_trace *ttrace;
878
752fde44
ACM
879 if (thread == NULL)
880 goto fail;
881
89dceb22
NK
882 if (thread__priv(thread) == NULL)
883 thread__set_priv(thread, thread_trace__new());
48000a1a 884
89dceb22 885 if (thread__priv(thread) == NULL)
752fde44
ACM
886 goto fail;
887
89dceb22 888 ttrace = thread__priv(thread);
efd5745e
ACM
889 ++ttrace->nr_events;
890
891 return ttrace;
752fde44 892fail:
c24ff998 893 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
894 "WARNING: not enough memory, dropping samples!\n");
895 return NULL;
896}
897
84486caa
ACM
898
899void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 900 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
901{
902 struct thread_trace *ttrace = thread__priv(arg->thread);
903
904 ttrace->ret_scnprintf = ret_scnprintf;
905}
906
598d02c5
SF
907#define TRACE_PFMAJ (1 << 0)
908#define TRACE_PFMIN (1 << 1)
909
e4d44e83
ACM
910static const size_t trace__entry_str_size = 2048;
911
97119f37 912static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 913{
89dceb22 914 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
915
916 if (fd > ttrace->paths.max) {
917 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
918
919 if (npath == NULL)
920 return -1;
921
922 if (ttrace->paths.max != -1) {
923 memset(npath + ttrace->paths.max + 1, 0,
924 (fd - ttrace->paths.max) * sizeof(char *));
925 } else {
926 memset(npath, 0, (fd + 1) * sizeof(char *));
927 }
928
929 ttrace->paths.table = npath;
930 ttrace->paths.max = fd;
931 }
932
933 ttrace->paths.table[fd] = strdup(pathname);
934
935 return ttrace->paths.table[fd] != NULL ? 0 : -1;
936}
937
97119f37
ACM
938static int thread__read_fd_path(struct thread *thread, int fd)
939{
940 char linkname[PATH_MAX], pathname[PATH_MAX];
941 struct stat st;
942 int ret;
943
944 if (thread->pid_ == thread->tid) {
945 scnprintf(linkname, sizeof(linkname),
946 "/proc/%d/fd/%d", thread->pid_, fd);
947 } else {
948 scnprintf(linkname, sizeof(linkname),
949 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
950 }
951
952 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
953 return -1;
954
955 ret = readlink(linkname, pathname, sizeof(pathname));
956
957 if (ret < 0 || ret > st.st_size)
958 return -1;
959
960 pathname[ret] = '\0';
961 return trace__set_fd_pathname(thread, fd, pathname);
962}
963
c522739d
ACM
964static const char *thread__fd_path(struct thread *thread, int fd,
965 struct trace *trace)
75b757ca 966{
89dceb22 967 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
968
969 if (ttrace == NULL)
970 return NULL;
971
972 if (fd < 0)
973 return NULL;
974
cdcd1e6b 975 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
976 if (!trace->live)
977 return NULL;
978 ++trace->stats.proc_getname;
cdcd1e6b 979 if (thread__read_fd_path(thread, fd))
c522739d
ACM
980 return NULL;
981 }
75b757ca
ACM
982
983 return ttrace->paths.table[fd];
984}
985
fc65eb82 986size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
987{
988 int fd = arg->val;
989 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 990 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
991
992 if (path)
993 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
994
995 return printed;
996}
997
0a2f7540
ACM
998size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
999{
1000 size_t printed = scnprintf(bf, size, "%d", fd);
1001 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1002
1003 if (thread) {
1004 const char *path = thread__fd_path(thread, fd, trace);
1005
1006 if (path)
1007 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1008
1009 thread__put(thread);
1010 }
1011
1012 return printed;
1013}
1014
75b757ca
ACM
1015static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1016 struct syscall_arg *arg)
1017{
1018 int fd = arg->val;
1019 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1020 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1021
04662523
ACM
1022 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1023 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1024
1025 return printed;
1026}
1027
f994592d
ACM
1028static void thread__set_filename_pos(struct thread *thread, const char *bf,
1029 unsigned long ptr)
1030{
1031 struct thread_trace *ttrace = thread__priv(thread);
1032
1033 ttrace->filename.ptr = ptr;
1034 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1035}
1036
1037static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1038 struct syscall_arg *arg)
1039{
1040 unsigned long ptr = arg->val;
1041
1042 if (!arg->trace->vfs_getname)
1043 return scnprintf(bf, size, "%#x", ptr);
1044
1045 thread__set_filename_pos(arg->thread, bf, ptr);
1046 return 0;
1047}
1048
ae9ed035
ACM
1049static bool trace__filter_duration(struct trace *trace, double t)
1050{
1051 return t < (trace->duration_filter * NSEC_PER_MSEC);
1052}
1053
fd2b2975 1054static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1055{
1056 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1057
60c907ab 1058 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1059}
1060
fd2b2975
ACM
1061/*
1062 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1063 * using ttrace->entry_time for a thread that receives a sys_exit without
1064 * first having received a sys_enter ("poll" issued before tracing session
1065 * starts, lost sys_enter exit due to ring buffer overflow).
1066 */
1067static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1068{
1069 if (tstamp > 0)
1070 return __trace__fprintf_tstamp(trace, tstamp, fp);
1071
1072 return fprintf(fp, " ? ");
1073}
1074
f15eb531 1075static bool done = false;
ba209f85 1076static bool interrupted = false;
f15eb531 1077
ba209f85 1078static void sig_handler(int sig)
f15eb531
NK
1079{
1080 done = true;
ba209f85 1081 interrupted = sig == SIGINT;
f15eb531
NK
1082}
1083
752fde44 1084static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1085 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1086{
1087 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1088 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1089
50c95cbd
ACM
1090 if (trace->multiple_threads) {
1091 if (trace->show_comm)
1902efe7 1092 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1093 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1094 }
752fde44
ACM
1095
1096 return printed;
1097}
1098
c24ff998 1099static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1100 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1101{
1102 int ret = 0;
1103
1104 switch (event->header.type) {
1105 case PERF_RECORD_LOST:
c24ff998 1106 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1107 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1108 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1109 break;
752fde44 1110 default:
162f0bef 1111 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1112 break;
1113 }
1114
1115 return ret;
1116}
1117
c24ff998 1118static int trace__tool_process(struct perf_tool *tool,
752fde44 1119 union perf_event *event,
162f0bef 1120 struct perf_sample *sample,
752fde44
ACM
1121 struct machine *machine)
1122{
c24ff998 1123 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1124 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1125}
1126
caf8a0d0
ACM
1127static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1128{
1129 struct machine *machine = vmachine;
1130
1131 if (machine->kptr_restrict_warned)
1132 return NULL;
1133
1134 if (symbol_conf.kptr_restrict) {
1135 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1136 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1137 "Kernel samples will not be resolved.\n");
1138 machine->kptr_restrict_warned = true;
1139 return NULL;
1140 }
1141
1142 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1143}
1144
752fde44
ACM
1145static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1146{
0a7e6d1b 1147 int err = symbol__init(NULL);
752fde44
ACM
1148
1149 if (err)
1150 return err;
1151
8fb598e5
DA
1152 trace->host = machine__new_host();
1153 if (trace->host == NULL)
1154 return -ENOMEM;
752fde44 1155
cbd5c178
AV
1156 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1157 if (err < 0)
1158 goto out;
706c3da4 1159
a33fbd56 1160 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1161 evlist->threads, trace__tool_process, false,
340b47f5 1162 trace->opts.proc_map_timeout, 1);
cbd5c178 1163out:
752fde44
ACM
1164 if (err)
1165 symbol__exit();
1166
1167 return err;
1168}
1169
33974a41
AV
1170static void trace__symbols__exit(struct trace *trace)
1171{
1172 machine__exit(trace->host);
1173 trace->host = NULL;
1174
1175 symbol__exit();
1176}
1177
5e58fcfa 1178static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1179{
5e58fcfa 1180 int idx;
13d4ff3e 1181
332337da
ACM
1182 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1183 nr_args = sc->fmt->nr_args;
1184
5e58fcfa 1185 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1186 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1187 return -1;
1188
5e58fcfa
ACM
1189 for (idx = 0; idx < nr_args; ++idx) {
1190 if (sc->fmt)
82d4a110 1191 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1192 }
82d4a110 1193
5e58fcfa
ACM
1194 sc->nr_args = nr_args;
1195 return 0;
1196}
1197
1198static int syscall__set_arg_fmts(struct syscall *sc)
1199{
1200 struct format_field *field;
1201 int idx = 0, len;
1202
1203 for (field = sc->args; field; field = field->next, ++idx) {
1204 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1205 continue;
1f115cb7 1206
82d4a110 1207 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1208 (strcmp(field->name, "filename") == 0 ||
1209 strcmp(field->name, "path") == 0 ||
1210 strcmp(field->name, "pathname") == 0))
82d4a110 1211 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1212 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1213 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1214 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1215 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1216 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1217 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1218 else if ((strcmp(field->type, "int") == 0 ||
1219 strcmp(field->type, "unsigned int") == 0 ||
1220 strcmp(field->type, "long") == 0) &&
1221 (len = strlen(field->name)) >= 2 &&
1222 strcmp(field->name + len - 2, "fd") == 0) {
1223 /*
1224 * /sys/kernel/tracing/events/syscalls/sys_enter*
1225 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1226 * 65 int
1227 * 23 unsigned int
1228 * 7 unsigned long
1229 */
82d4a110 1230 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1231 }
13d4ff3e
ACM
1232 }
1233
1234 return 0;
1235}
1236
514f1c67
ACM
1237static int trace__read_syscall_info(struct trace *trace, int id)
1238{
1239 char tp_name[128];
1240 struct syscall *sc;
fd0db102 1241 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1242
1243 if (name == NULL)
1244 return -1;
514f1c67
ACM
1245
1246 if (id > trace->syscalls.max) {
1247 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1248
1249 if (nsyscalls == NULL)
1250 return -1;
1251
1252 if (trace->syscalls.max != -1) {
1253 memset(nsyscalls + trace->syscalls.max + 1, 0,
1254 (id - trace->syscalls.max) * sizeof(*sc));
1255 } else {
1256 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1257 }
1258
1259 trace->syscalls.table = nsyscalls;
1260 trace->syscalls.max = id;
1261 }
1262
1263 sc = trace->syscalls.table + id;
3a531260 1264 sc->name = name;
2ae3a312 1265
3a531260 1266 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1267
aec1930b 1268 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1269 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1270
8dd2a131 1271 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1272 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1273 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1274 }
514f1c67 1275
5e58fcfa
ACM
1276 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1277 return -1;
1278
8dd2a131 1279 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1280 return -1;
1281
f208bd8d 1282 sc->args = sc->tp_format->format.fields;
c42de706
TS
1283 /*
1284 * We need to check and discard the first variable '__syscall_nr'
1285 * or 'nr' that mean the syscall number. It is needless here.
1286 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1287 */
1288 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1289 sc->args = sc->args->next;
1290 --sc->nr_args;
1291 }
1292
5089f20e
ACM
1293 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1294
13d4ff3e 1295 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1296}
1297
d0cc439b
ACM
1298static int trace__validate_ev_qualifier(struct trace *trace)
1299{
8b3ce757 1300 int err = 0, i;
27702bcf 1301 size_t nr_allocated;
d0cc439b
ACM
1302 struct str_node *pos;
1303
8b3ce757
ACM
1304 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1305 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1306 sizeof(trace->ev_qualifier_ids.entries[0]));
1307
1308 if (trace->ev_qualifier_ids.entries == NULL) {
1309 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1310 trace->output);
1311 err = -EINVAL;
1312 goto out;
1313 }
1314
27702bcf 1315 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1316 i = 0;
1317
602a1f4d 1318 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1319 const char *sc = pos->s;
27702bcf 1320 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1321
8b3ce757 1322 if (id < 0) {
27702bcf
ACM
1323 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1324 if (id >= 0)
1325 goto matches;
1326
d0cc439b
ACM
1327 if (err == 0) {
1328 fputs("Error:\tInvalid syscall ", trace->output);
1329 err = -EINVAL;
1330 } else {
1331 fputs(", ", trace->output);
1332 }
1333
1334 fputs(sc, trace->output);
1335 }
27702bcf 1336matches:
8b3ce757 1337 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1338 if (match_next == -1)
1339 continue;
1340
1341 while (1) {
1342 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1343 if (id < 0)
1344 break;
1345 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1346 void *entries;
1347
1348 nr_allocated += 8;
1349 entries = realloc(trace->ev_qualifier_ids.entries,
1350 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1351 if (entries == NULL) {
1352 err = -ENOMEM;
1353 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1354 goto out_free;
1355 }
1356 trace->ev_qualifier_ids.entries = entries;
1357 }
1358 trace->ev_qualifier_ids.nr++;
1359 trace->ev_qualifier_ids.entries[i++] = id;
1360 }
d0cc439b
ACM
1361 }
1362
1363 if (err < 0) {
1364 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1365 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1366out_free:
8b3ce757
ACM
1367 zfree(&trace->ev_qualifier_ids.entries);
1368 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1369 }
8b3ce757 1370out:
d0cc439b
ACM
1371 return err;
1372}
1373
55d43bca
DA
1374/*
1375 * args is to be interpreted as a series of longs but we need to handle
1376 * 8-byte unaligned accesses. args points to raw_data within the event
1377 * and raw_data is guaranteed to be 8-byte unaligned because it is
1378 * preceded by raw_size which is a u32. So we need to copy args to a temp
1379 * variable to read it. Most notably this avoids extended load instructions
1380 * on unaligned addresses
1381 */
325f5091 1382unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1383{
1384 unsigned long val;
325f5091 1385 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1386
1387 memcpy(&val, p, sizeof(val));
1388 return val;
1389}
1390
c51bdfec
ACM
1391static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1392 struct syscall_arg *arg)
1393{
1394 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1395 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1396
1397 return scnprintf(bf, size, "arg%d: ", arg->idx);
1398}
1399
d032d79e
ACM
1400static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1401 struct syscall_arg *arg, unsigned long val)
1402{
1403 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1404 arg->val = val;
1405 if (sc->arg_fmt[arg->idx].parm)
1406 arg->parm = sc->arg_fmt[arg->idx].parm;
1407 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1408 }
1409 return scnprintf(bf, size, "%ld", val);
1410}
1411
752fde44 1412static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1413 unsigned char *args, struct trace *trace,
75b757ca 1414 struct thread *thread)
514f1c67 1415{
514f1c67 1416 size_t printed = 0;
55d43bca 1417 unsigned long val;
d032d79e
ACM
1418 u8 bit = 1;
1419 struct syscall_arg arg = {
1420 .args = args,
1421 .idx = 0,
1422 .mask = 0,
1423 .trace = trace,
1424 .thread = thread,
1425 };
84486caa
ACM
1426 struct thread_trace *ttrace = thread__priv(thread);
1427
1428 /*
1429 * Things like fcntl will set this in its 'cmd' formatter to pick the
1430 * right formatter for the return value (an fd? file flags?), which is
1431 * not needed for syscalls that always return a given type, say an fd.
1432 */
1433 ttrace->ret_scnprintf = NULL;
514f1c67 1434
f208bd8d 1435 if (sc->args != NULL) {
514f1c67 1436 struct format_field *field;
6e7eeb51 1437
f208bd8d 1438 for (field = sc->args; field;
01533e97
ACM
1439 field = field->next, ++arg.idx, bit <<= 1) {
1440 if (arg.mask & bit)
6e7eeb51 1441 continue;
55d43bca 1442
f9f83b33 1443 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1444
4aa58232
ACM
1445 /*
1446 * Suppress this argument if its value is zero and
1447 * and we don't have a string associated in an
1448 * strarray for it.
1449 */
55d43bca 1450 if (val == 0 &&
82d4a110 1451 !(sc->arg_fmt &&
d47737d5
ACM
1452 (sc->arg_fmt[arg.idx].show_zero ||
1453 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1454 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1455 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1456 continue;
1457
752fde44 1458 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1459 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1460 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1461 }
4c4d6e51
ACM
1462 } else if (IS_ERR(sc->tp_format)) {
1463 /*
1464 * If we managed to read the tracepoint /format file, then we
1465 * may end up not having any args, like with gettid(), so only
1466 * print the raw args when we didn't manage to read it.
1467 */
332337da 1468 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1469 if (arg.mask & bit)
1470 goto next_arg;
1471 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1472 if (printed)
1473 printed += scnprintf(bf + printed, size - printed, ", ");
1474 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1475 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1476next_arg:
1477 ++arg.idx;
1478 bit <<= 1;
514f1c67
ACM
1479 }
1480 }
1481
1482 return printed;
1483}
1484
ba3d7dee 1485typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1486 union perf_event *event,
ba3d7dee
ACM
1487 struct perf_sample *sample);
1488
1489static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1490 struct perf_evsel *evsel, int id)
ba3d7dee 1491{
ba3d7dee
ACM
1492
1493 if (id < 0) {
adaa18bf
ACM
1494
1495 /*
1496 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1497 * before that, leaving at a higher verbosity level till that is
1498 * explained. Reproduced with plain ftrace with:
1499 *
1500 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1501 * grep "NR -1 " /t/trace_pipe
1502 *
1503 * After generating some load on the machine.
1504 */
1505 if (verbose > 1) {
1506 static u64 n;
1507 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1508 id, perf_evsel__name(evsel), ++n);
1509 }
ba3d7dee
ACM
1510 return NULL;
1511 }
1512
1513 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1514 trace__read_syscall_info(trace, id))
1515 goto out_cant_read;
1516
1517 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1518 goto out_cant_read;
1519
1520 return &trace->syscalls.table[id];
1521
1522out_cant_read:
bb963e16 1523 if (verbose > 0) {
7c304ee0
ACM
1524 fprintf(trace->output, "Problems reading syscall %d", id);
1525 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1526 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1527 fputs(" information\n", trace->output);
1528 }
ba3d7dee
ACM
1529 return NULL;
1530}
1531
bf2575c1
DA
1532static void thread__update_stats(struct thread_trace *ttrace,
1533 int id, struct perf_sample *sample)
1534{
1535 struct int_node *inode;
1536 struct stats *stats;
1537 u64 duration = 0;
1538
1539 inode = intlist__findnew(ttrace->syscall_stats, id);
1540 if (inode == NULL)
1541 return;
1542
1543 stats = inode->priv;
1544 if (stats == NULL) {
1545 stats = malloc(sizeof(struct stats));
1546 if (stats == NULL)
1547 return;
1548 init_stats(stats);
1549 inode->priv = stats;
1550 }
1551
1552 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1553 duration = sample->time - ttrace->entry_time;
1554
1555 update_stats(stats, duration);
1556}
1557
e596663e
ACM
1558static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1559{
1560 struct thread_trace *ttrace;
1561 u64 duration;
1562 size_t printed;
1563
1564 if (trace->current == NULL)
1565 return 0;
1566
1567 ttrace = thread__priv(trace->current);
1568
1569 if (!ttrace->entry_pending)
1570 return 0;
1571
1572 duration = sample->time - ttrace->entry_time;
1573
fd2b2975 1574 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1575 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1576 ttrace->entry_pending = false;
1577
1578 return printed;
1579}
1580
ba3d7dee 1581static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1582 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1583 struct perf_sample *sample)
1584{
752fde44 1585 char *msg;
ba3d7dee 1586 void *args;
752fde44 1587 size_t printed = 0;
2ae3a312 1588 struct thread *thread;
b91fc39f 1589 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1590 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1591 struct thread_trace *ttrace;
1592
1593 if (sc == NULL)
1594 return -1;
ba3d7dee 1595
8fb598e5 1596 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1597 ttrace = thread__trace(thread, trace->output);
2ae3a312 1598 if (ttrace == NULL)
b91fc39f 1599 goto out_put;
ba3d7dee 1600
77170988 1601 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1602
1603 if (ttrace->entry_str == NULL) {
e4d44e83 1604 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1605 if (!ttrace->entry_str)
b91fc39f 1606 goto out_put;
752fde44
ACM
1607 }
1608
5cf9c84e 1609 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1610 trace__printf_interrupted_entry(trace, sample);
e596663e 1611
752fde44
ACM
1612 ttrace->entry_time = sample->time;
1613 msg = ttrace->entry_str;
e4d44e83 1614 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1615
e4d44e83 1616 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1617 args, trace, thread);
752fde44 1618
5089f20e 1619 if (sc->is_exit) {
5cf9c84e 1620 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1621 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1622 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1623 }
7f4f8001 1624 } else {
752fde44 1625 ttrace->entry_pending = true;
7f4f8001
ACM
1626 /* See trace__vfs_getname & trace__sys_exit */
1627 ttrace->filename.pending_open = false;
1628 }
ba3d7dee 1629
f3b623b8
ACM
1630 if (trace->current != thread) {
1631 thread__put(trace->current);
1632 trace->current = thread__get(thread);
1633 }
b91fc39f
ACM
1634 err = 0;
1635out_put:
1636 thread__put(thread);
1637 return err;
ba3d7dee
ACM
1638}
1639
5cf9c84e
ACM
1640static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1641 struct perf_sample *sample,
1642 struct callchain_cursor *cursor)
202ff968
ACM
1643{
1644 struct addr_location al;
5cf9c84e
ACM
1645
1646 if (machine__resolve(trace->host, &al, sample) < 0 ||
1647 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1648 return -1;
1649
1650 return 0;
1651}
1652
1653static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1654{
202ff968 1655 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1656 const unsigned int print_opts = EVSEL__PRINT_SYM |
1657 EVSEL__PRINT_DSO |
1658 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1659
d327e60c 1660 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1661}
1662
ba3d7dee 1663static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1664 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1665 struct perf_sample *sample)
1666{
2c82c3ad 1667 long ret;
60c907ab 1668 u64 duration = 0;
fd2b2975 1669 bool duration_calculated = false;
2ae3a312 1670 struct thread *thread;
5cf9c84e 1671 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1672 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1673 struct thread_trace *ttrace;
1674
1675 if (sc == NULL)
1676 return -1;
ba3d7dee 1677
8fb598e5 1678 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1679 ttrace = thread__trace(thread, trace->output);
2ae3a312 1680 if (ttrace == NULL)
b91fc39f 1681 goto out_put;
ba3d7dee 1682
bf2575c1
DA
1683 if (trace->summary)
1684 thread__update_stats(ttrace, id, sample);
1685
77170988 1686 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1687
fd0db102 1688 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1689 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1690 ttrace->filename.pending_open = false;
c522739d
ACM
1691 ++trace->stats.vfs_getname;
1692 }
1693
ae9ed035 1694 if (ttrace->entry_time) {
60c907ab 1695 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1696 if (trace__filter_duration(trace, duration))
1697 goto out;
fd2b2975 1698 duration_calculated = true;
ae9ed035
ACM
1699 } else if (trace->duration_filter)
1700 goto out;
60c907ab 1701
5cf9c84e
ACM
1702 if (sample->callchain) {
1703 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1704 if (callchain_ret == 0) {
1705 if (callchain_cursor.nr < trace->min_stack)
1706 goto out;
1707 callchain_ret = 1;
1708 }
1709 }
1710
fd2eabaf
DA
1711 if (trace->summary_only)
1712 goto out;
1713
fd2b2975 1714 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1715
1716 if (ttrace->entry_pending) {
c24ff998 1717 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1718 } else {
c24ff998
ACM
1719 fprintf(trace->output, " ... [");
1720 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1721 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1722 }
1723
da3c9a44 1724 if (sc->fmt == NULL) {
1f63139c
ACM
1725 if (ret < 0)
1726 goto errno_print;
da3c9a44 1727signed_print:
6f8fe61e 1728 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1729 } else if (ret < 0) {
1730errno_print: {
942a91ed 1731 char bf[STRERR_BUFSIZE];
c8b5f2c9 1732 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1733 *e = audit_errno_to_name(-ret);
1734
c24ff998 1735 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1736 }
da3c9a44 1737 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1738 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1739 else if (ttrace->ret_scnprintf) {
1740 char bf[1024];
7ee57434
ACM
1741 struct syscall_arg arg = {
1742 .val = ret,
1743 .thread = thread,
1744 .trace = trace,
1745 };
1746 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1747 ttrace->ret_scnprintf = NULL;
1748 fprintf(trace->output, ") = %s", bf);
1749 } else if (sc->fmt->hexret)
2c82c3ad 1750 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1751 else if (sc->fmt->errpid) {
1752 struct thread *child = machine__find_thread(trace->host, ret, ret);
1753
1754 if (child != NULL) {
1755 fprintf(trace->output, ") = %ld", ret);
1756 if (child->comm_set)
1757 fprintf(trace->output, " (%s)", thread__comm_str(child));
1758 thread__put(child);
1759 }
1760 } else
da3c9a44 1761 goto signed_print;
ba3d7dee 1762
c24ff998 1763 fputc('\n', trace->output);
566a0885 1764
5cf9c84e
ACM
1765 if (callchain_ret > 0)
1766 trace__fprintf_callchain(trace, sample);
1767 else if (callchain_ret < 0)
1768 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1769out:
752fde44 1770 ttrace->entry_pending = false;
b91fc39f
ACM
1771 err = 0;
1772out_put:
1773 thread__put(thread);
1774 return err;
ba3d7dee
ACM
1775}
1776
c522739d 1777static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1778 union perf_event *event __maybe_unused,
c522739d
ACM
1779 struct perf_sample *sample)
1780{
f994592d
ACM
1781 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1782 struct thread_trace *ttrace;
1783 size_t filename_len, entry_str_len, to_move;
1784 ssize_t remaining_space;
1785 char *pos;
7f4f8001 1786 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1787
1788 if (!thread)
1789 goto out;
1790
1791 ttrace = thread__priv(thread);
1792 if (!ttrace)
ef65e96e 1793 goto out_put;
f994592d 1794
7f4f8001 1795 filename_len = strlen(filename);
39f0e7a8 1796 if (filename_len == 0)
ef65e96e 1797 goto out_put;
7f4f8001
ACM
1798
1799 if (ttrace->filename.namelen < filename_len) {
1800 char *f = realloc(ttrace->filename.name, filename_len + 1);
1801
1802 if (f == NULL)
ef65e96e 1803 goto out_put;
7f4f8001
ACM
1804
1805 ttrace->filename.namelen = filename_len;
1806 ttrace->filename.name = f;
1807 }
1808
1809 strcpy(ttrace->filename.name, filename);
1810 ttrace->filename.pending_open = true;
1811
f994592d 1812 if (!ttrace->filename.ptr)
ef65e96e 1813 goto out_put;
f994592d
ACM
1814
1815 entry_str_len = strlen(ttrace->entry_str);
1816 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1817 if (remaining_space <= 0)
ef65e96e 1818 goto out_put;
f994592d 1819
f994592d
ACM
1820 if (filename_len > (size_t)remaining_space) {
1821 filename += filename_len - remaining_space;
1822 filename_len = remaining_space;
1823 }
1824
1825 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1826 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1827 memmove(pos + filename_len, pos, to_move);
1828 memcpy(pos, filename, filename_len);
1829
1830 ttrace->filename.ptr = 0;
1831 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1832out_put:
1833 thread__put(thread);
f994592d 1834out:
c522739d
ACM
1835 return 0;
1836}
1837
1302d88e 1838static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1839 union perf_event *event __maybe_unused,
1302d88e
ACM
1840 struct perf_sample *sample)
1841{
1842 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1843 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1844 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1845 sample->pid,
1846 sample->tid);
c24ff998 1847 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1848
1849 if (ttrace == NULL)
1850 goto out_dump;
1851
1852 ttrace->runtime_ms += runtime_ms;
1853 trace->runtime_ms += runtime_ms;
ef65e96e 1854out_put:
b91fc39f 1855 thread__put(thread);
1302d88e
ACM
1856 return 0;
1857
1858out_dump:
c24ff998 1859 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1860 evsel->name,
1861 perf_evsel__strval(evsel, sample, "comm"),
1862 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1863 runtime,
1864 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1865 goto out_put;
1302d88e
ACM
1866}
1867
923d0c9a
ACM
1868static int bpf_output__printer(enum binary_printer_ops op,
1869 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 1870{
1d6c9407
WN
1871 unsigned char ch = (unsigned char)val;
1872
1873 switch (op) {
1874 case BINARY_PRINT_CHAR_DATA:
923d0c9a 1875 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
1876 case BINARY_PRINT_DATA_BEGIN:
1877 case BINARY_PRINT_LINE_BEGIN:
1878 case BINARY_PRINT_ADDR:
1879 case BINARY_PRINT_NUM_DATA:
1880 case BINARY_PRINT_NUM_PAD:
1881 case BINARY_PRINT_SEP:
1882 case BINARY_PRINT_CHAR_PAD:
1883 case BINARY_PRINT_LINE_END:
1884 case BINARY_PRINT_DATA_END:
1885 default:
1886 break;
1887 }
923d0c9a
ACM
1888
1889 return 0;
1d6c9407
WN
1890}
1891
1892static void bpf_output__fprintf(struct trace *trace,
1893 struct perf_sample *sample)
1894{
923d0c9a
ACM
1895 binary__fprintf(sample->raw_data, sample->raw_size, 8,
1896 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
1897}
1898
14a052df
ACM
1899static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1900 union perf_event *event __maybe_unused,
1901 struct perf_sample *sample)
1902{
7ad35615
ACM
1903 int callchain_ret = 0;
1904
1905 if (sample->callchain) {
1906 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1907 if (callchain_ret == 0) {
1908 if (callchain_cursor.nr < trace->min_stack)
1909 goto out;
1910 callchain_ret = 1;
1911 }
1912 }
1913
14a052df
ACM
1914 trace__printf_interrupted_entry(trace, sample);
1915 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1916
1917 if (trace->trace_syscalls)
1918 fprintf(trace->output, "( ): ");
1919
1920 fprintf(trace->output, "%s:", evsel->name);
14a052df 1921
1d6c9407
WN
1922 if (perf_evsel__is_bpf_output(evsel)) {
1923 bpf_output__fprintf(trace, sample);
1924 } else if (evsel->tp_format) {
14a052df
ACM
1925 event_format__fprintf(evsel->tp_format, sample->cpu,
1926 sample->raw_data, sample->raw_size,
1927 trace->output);
1928 }
1929
1930 fprintf(trace->output, ")\n");
202ff968 1931
7ad35615
ACM
1932 if (callchain_ret > 0)
1933 trace__fprintf_callchain(trace, sample);
1934 else if (callchain_ret < 0)
1935 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1936out:
14a052df
ACM
1937 return 0;
1938}
1939
598d02c5
SF
1940static void print_location(FILE *f, struct perf_sample *sample,
1941 struct addr_location *al,
1942 bool print_dso, bool print_sym)
1943{
1944
bb963e16 1945 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1946 fprintf(f, "%s@", al->map->dso->long_name);
1947
bb963e16 1948 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1949 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1950 al->addr - al->sym->start);
1951 else if (al->map)
4414a3c5 1952 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1953 else
4414a3c5 1954 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1955}
1956
1957static int trace__pgfault(struct trace *trace,
1958 struct perf_evsel *evsel,
473398a2 1959 union perf_event *event __maybe_unused,
598d02c5
SF
1960 struct perf_sample *sample)
1961{
1962 struct thread *thread;
598d02c5
SF
1963 struct addr_location al;
1964 char map_type = 'd';
a2ea67d7 1965 struct thread_trace *ttrace;
b91fc39f 1966 int err = -1;
1df54290 1967 int callchain_ret = 0;
598d02c5
SF
1968
1969 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1970
1971 if (sample->callchain) {
1972 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1973 if (callchain_ret == 0) {
1974 if (callchain_cursor.nr < trace->min_stack)
1975 goto out_put;
1976 callchain_ret = 1;
1977 }
1978 }
1979
a2ea67d7
SF
1980 ttrace = thread__trace(thread, trace->output);
1981 if (ttrace == NULL)
b91fc39f 1982 goto out_put;
a2ea67d7
SF
1983
1984 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1985 ttrace->pfmaj++;
1986 else
1987 ttrace->pfmin++;
1988
1989 if (trace->summary_only)
b91fc39f 1990 goto out;
598d02c5 1991
473398a2 1992 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1993 sample->ip, &al);
1994
fd2b2975 1995 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1996
1997 fprintf(trace->output, "%sfault [",
1998 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1999 "maj" : "min");
2000
2001 print_location(trace->output, sample, &al, false, true);
2002
2003 fprintf(trace->output, "] => ");
2004
473398a2 2005 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
2006 sample->addr, &al);
2007
2008 if (!al.map) {
473398a2 2009 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
2010 MAP__FUNCTION, sample->addr, &al);
2011
2012 if (al.map)
2013 map_type = 'x';
2014 else
2015 map_type = '?';
2016 }
2017
2018 print_location(trace->output, sample, &al, true, false);
2019
2020 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2021
1df54290
ACM
2022 if (callchain_ret > 0)
2023 trace__fprintf_callchain(trace, sample);
2024 else if (callchain_ret < 0)
2025 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2026out:
2027 err = 0;
2028out_put:
2029 thread__put(thread);
2030 return err;
598d02c5
SF
2031}
2032
e6001980 2033static void trace__set_base_time(struct trace *trace,
8a07a809 2034 struct perf_evsel *evsel,
e6001980
ACM
2035 struct perf_sample *sample)
2036{
8a07a809
ACM
2037 /*
2038 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2039 * and don't use sample->time unconditionally, we may end up having
2040 * some other event in the future without PERF_SAMPLE_TIME for good
2041 * reason, i.e. we may not be interested in its timestamps, just in
2042 * it taking place, picking some piece of information when it
2043 * appears in our event stream (vfs_getname comes to mind).
2044 */
2045 if (trace->base_time == 0 && !trace->full_time &&
2046 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2047 trace->base_time = sample->time;
2048}
2049
6810fc91 2050static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2051 union perf_event *event,
6810fc91
DA
2052 struct perf_sample *sample,
2053 struct perf_evsel *evsel,
2054 struct machine *machine __maybe_unused)
2055{
2056 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2057 struct thread *thread;
6810fc91
DA
2058 int err = 0;
2059
744a9719 2060 tracepoint_handler handler = evsel->handler;
6810fc91 2061
aa07df6e
DA
2062 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2063 if (thread && thread__is_filtered(thread))
ef65e96e 2064 goto out;
bdc89661 2065
e6001980 2066 trace__set_base_time(trace, evsel, sample);
6810fc91 2067
3160565f
DA
2068 if (handler) {
2069 ++trace->nr_events;
0c82adcf 2070 handler(trace, evsel, event, sample);
3160565f 2071 }
ef65e96e
ACM
2072out:
2073 thread__put(thread);
6810fc91
DA
2074 return err;
2075}
2076
1e28fe0a 2077static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2078{
2079 unsigned int rec_argc, i, j;
2080 const char **rec_argv;
2081 const char * const record_args[] = {
2082 "record",
2083 "-R",
2084 "-m", "1024",
2085 "-c", "1",
5e2485b1
DA
2086 };
2087
1e28fe0a
SF
2088 const char * const sc_args[] = { "-e", };
2089 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2090 const char * const majpf_args[] = { "-e", "major-faults" };
2091 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2092 const char * const minpf_args[] = { "-e", "minor-faults" };
2093 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2094
9aca7f17 2095 /* +1 is for the event string below */
1e28fe0a
SF
2096 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2097 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2098 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2099
2100 if (rec_argv == NULL)
2101 return -ENOMEM;
2102
1e28fe0a 2103 j = 0;
5e2485b1 2104 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2105 rec_argv[j++] = record_args[i];
2106
e281a960
SF
2107 if (trace->trace_syscalls) {
2108 for (i = 0; i < sc_args_nr; i++)
2109 rec_argv[j++] = sc_args[i];
2110
2111 /* event string may be different for older kernels - e.g., RHEL6 */
2112 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2113 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2114 else if (is_valid_tracepoint("syscalls:sys_enter"))
2115 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2116 else {
2117 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2118 free(rec_argv);
e281a960
SF
2119 return -1;
2120 }
9aca7f17 2121 }
9aca7f17 2122
1e28fe0a
SF
2123 if (trace->trace_pgfaults & TRACE_PFMAJ)
2124 for (i = 0; i < majpf_args_nr; i++)
2125 rec_argv[j++] = majpf_args[i];
2126
2127 if (trace->trace_pgfaults & TRACE_PFMIN)
2128 for (i = 0; i < minpf_args_nr; i++)
2129 rec_argv[j++] = minpf_args[i];
2130
2131 for (i = 0; i < (unsigned int)argc; i++)
2132 rec_argv[j++] = argv[i];
5e2485b1 2133
b0ad8ea6 2134 return cmd_record(j, rec_argv);
5e2485b1
DA
2135}
2136
bf2575c1
DA
2137static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2138
08c98776 2139static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2140{
ef503831 2141 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2142
2143 if (IS_ERR(evsel))
08c98776 2144 return false;
c522739d
ACM
2145
2146 if (perf_evsel__field(evsel, "pathname") == NULL) {
2147 perf_evsel__delete(evsel);
08c98776 2148 return false;
c522739d
ACM
2149 }
2150
744a9719 2151 evsel->handler = trace__vfs_getname;
c522739d 2152 perf_evlist__add(evlist, evsel);
08c98776 2153 return true;
c522739d
ACM
2154}
2155
0ae537cb 2156static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2157{
2158 struct perf_evsel *evsel;
2159 struct perf_event_attr attr = {
2160 .type = PERF_TYPE_SOFTWARE,
2161 .mmap_data = 1,
598d02c5
SF
2162 };
2163
2164 attr.config = config;
0524798c 2165 attr.sample_period = 1;
598d02c5
SF
2166
2167 event_attr_init(&attr);
2168
2169 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2170 if (evsel)
2171 evsel->handler = trace__pgfault;
598d02c5 2172
0ae537cb 2173 return evsel;
598d02c5
SF
2174}
2175
ddbb1b13
ACM
2176static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2177{
2178 const u32 type = event->header.type;
2179 struct perf_evsel *evsel;
2180
ddbb1b13
ACM
2181 if (type != PERF_RECORD_SAMPLE) {
2182 trace__process_event(trace, trace->host, event, sample);
2183 return;
2184 }
2185
2186 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2187 if (evsel == NULL) {
2188 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2189 return;
2190 }
2191
e6001980
ACM
2192 trace__set_base_time(trace, evsel, sample);
2193
ddbb1b13
ACM
2194 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2195 sample->raw_data == NULL) {
2196 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2197 perf_evsel__name(evsel), sample->tid,
2198 sample->cpu, sample->raw_size);
2199 } else {
2200 tracepoint_handler handler = evsel->handler;
2201 handler(trace, evsel, event, sample);
2202 }
2203}
2204
c27366f0
ACM
2205static int trace__add_syscall_newtp(struct trace *trace)
2206{
2207 int ret = -1;
2208 struct perf_evlist *evlist = trace->evlist;
2209 struct perf_evsel *sys_enter, *sys_exit;
2210
2211 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2212 if (sys_enter == NULL)
2213 goto out;
2214
2215 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2216 goto out_delete_sys_enter;
2217
2218 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2219 if (sys_exit == NULL)
2220 goto out_delete_sys_enter;
2221
2222 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2223 goto out_delete_sys_exit;
2224
2225 perf_evlist__add(evlist, sys_enter);
2226 perf_evlist__add(evlist, sys_exit);
2227
2ddd5c04 2228 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2229 /*
2230 * We're interested only in the user space callchain
2231 * leading to the syscall, allow overriding that for
2232 * debugging reasons using --kernel_syscall_callchains
2233 */
2234 sys_exit->attr.exclude_callchain_kernel = 1;
2235 }
2236
8b3ce757
ACM
2237 trace->syscalls.events.sys_enter = sys_enter;
2238 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2239
2240 ret = 0;
2241out:
2242 return ret;
2243
2244out_delete_sys_exit:
2245 perf_evsel__delete_priv(sys_exit);
2246out_delete_sys_enter:
2247 perf_evsel__delete_priv(sys_enter);
2248 goto out;
2249}
2250
19867b61
ACM
2251static int trace__set_ev_qualifier_filter(struct trace *trace)
2252{
2253 int err = -1;
b15d0a4c 2254 struct perf_evsel *sys_exit;
19867b61
ACM
2255 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2256 trace->ev_qualifier_ids.nr,
2257 trace->ev_qualifier_ids.entries);
2258
2259 if (filter == NULL)
2260 goto out_enomem;
2261
3541c034
MP
2262 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2263 filter)) {
b15d0a4c 2264 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2265 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2266 }
19867b61
ACM
2267
2268 free(filter);
2269out:
2270 return err;
2271out_enomem:
2272 errno = ENOMEM;
2273 goto out;
2274}
c27366f0 2275
dd1a5037
ACM
2276static int trace__set_filter_loop_pids(struct trace *trace)
2277{
082ab9a1 2278 unsigned int nr = 1;
dd1a5037
ACM
2279 pid_t pids[32] = {
2280 getpid(),
2281 };
082ab9a1
ACM
2282 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2283
2284 while (thread && nr < ARRAY_SIZE(pids)) {
2285 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2286
2287 if (parent == NULL)
2288 break;
2289
2290 if (!strcmp(thread__comm_str(parent), "sshd")) {
2291 pids[nr++] = parent->tid;
2292 break;
2293 }
2294 thread = parent;
2295 }
dd1a5037
ACM
2296
2297 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2298}
2299
f15eb531 2300static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2301{
14a052df 2302 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2303 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2304 int err = -1, i;
2305 unsigned long before;
f15eb531 2306 const bool forks = argc > 0;
46fb3c21 2307 bool draining = false;
514f1c67 2308
75b757ca
ACM
2309 trace->live = true;
2310
c27366f0 2311 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2312 goto out_error_raw_syscalls;
514f1c67 2313
e281a960 2314 if (trace->trace_syscalls)
08c98776 2315 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2316
0ae537cb
ACM
2317 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2318 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2319 if (pgfault_maj == NULL)
2320 goto out_error_mem;
2321 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2322 }
598d02c5 2323
0ae537cb
ACM
2324 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2325 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2326 if (pgfault_min == NULL)
2327 goto out_error_mem;
2328 perf_evlist__add(evlist, pgfault_min);
2329 }
598d02c5 2330
1302d88e 2331 if (trace->sched &&
2cc990ba
ACM
2332 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2333 trace__sched_stat_runtime))
2334 goto out_error_sched_stat_runtime;
1302d88e 2335
514f1c67
ACM
2336 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2337 if (err < 0) {
c24ff998 2338 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2339 goto out_delete_evlist;
2340 }
2341
752fde44
ACM
2342 err = trace__symbols_init(trace, evlist);
2343 if (err < 0) {
c24ff998 2344 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2345 goto out_delete_evlist;
752fde44
ACM
2346 }
2347
fde54b78
ACM
2348 perf_evlist__config(evlist, &trace->opts, NULL);
2349
0c3a6ef4 2350 if (callchain_param.enabled) {
0c3a6ef4
ACM
2351 if (trace->syscalls.events.sys_exit) {
2352 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2353 &trace->opts, &callchain_param);
0c3a6ef4
ACM
2354 }
2355
2356 if (pgfault_maj) {
2357 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0c3a6ef4
ACM
2358 }
2359
2360 if (pgfault_min) {
2361 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0c3a6ef4 2362 }
fde54b78 2363 }
514f1c67 2364
f15eb531
NK
2365 signal(SIGCHLD, sig_handler);
2366 signal(SIGINT, sig_handler);
2367
2368 if (forks) {
6ef73ec4 2369 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2370 argv, false, NULL);
f15eb531 2371 if (err < 0) {
c24ff998 2372 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2373 goto out_delete_evlist;
f15eb531
NK
2374 }
2375 }
2376
514f1c67 2377 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2378 if (err < 0)
2379 goto out_error_open;
514f1c67 2380
ba504235
WN
2381 err = bpf__apply_obj_config();
2382 if (err) {
2383 char errbuf[BUFSIZ];
2384
2385 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2386 pr_err("ERROR: Apply config to BPF failed: %s\n",
2387 errbuf);
2388 goto out_error_open;
2389 }
2390
241b057c
ACM
2391 /*
2392 * Better not use !target__has_task() here because we need to cover the
2393 * case where no threads were specified in the command line, but a
2394 * workload was, and in that case we will fill in the thread_map when
2395 * we fork the workload in perf_evlist__prepare_workload.
2396 */
f078c385
ACM
2397 if (trace->filter_pids.nr > 0)
2398 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2399 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2400 err = trace__set_filter_loop_pids(trace);
f078c385 2401
94ad89bc
ACM
2402 if (err < 0)
2403 goto out_error_mem;
2404
19867b61
ACM
2405 if (trace->ev_qualifier_ids.nr > 0) {
2406 err = trace__set_ev_qualifier_filter(trace);
2407 if (err < 0)
2408 goto out_errno;
19867b61 2409
2e5e5f87
ACM
2410 pr_debug("event qualifier tracepoint filter: %s\n",
2411 trace->syscalls.events.sys_exit->filter);
2412 }
19867b61 2413
94ad89bc
ACM
2414 err = perf_evlist__apply_filters(evlist, &evsel);
2415 if (err < 0)
2416 goto out_error_apply_filters;
241b057c 2417
f74b9d3a 2418 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2419 if (err < 0)
2420 goto out_error_mmap;
514f1c67 2421
e36b7821 2422 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2423 perf_evlist__enable(evlist);
2424
f15eb531
NK
2425 if (forks)
2426 perf_evlist__start_workload(evlist);
2427
e36b7821
AB
2428 if (trace->opts.initial_delay) {
2429 usleep(trace->opts.initial_delay * 1000);
2430 perf_evlist__enable(evlist);
2431 }
2432
e13798c7 2433 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2434 evlist->threads->nr > 1 ||
2435 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2436again:
efd5745e 2437 before = trace->nr_events;
514f1c67
ACM
2438
2439 for (i = 0; i < evlist->nr_mmaps; i++) {
2440 union perf_event *event;
2441
2442 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2443 struct perf_sample sample;
514f1c67 2444
efd5745e 2445 ++trace->nr_events;
514f1c67 2446
514f1c67
ACM
2447 err = perf_evlist__parse_sample(evlist, event, &sample);
2448 if (err) {
c24ff998 2449 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2450 goto next_event;
514f1c67
ACM
2451 }
2452
ddbb1b13 2453 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2454next_event:
2455 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2456
ba209f85
ACM
2457 if (interrupted)
2458 goto out_disable;
02ac5421
ACM
2459
2460 if (done && !draining) {
2461 perf_evlist__disable(evlist);
2462 draining = true;
2463 }
514f1c67
ACM
2464 }
2465 }
2466
efd5745e 2467 if (trace->nr_events == before) {
ba209f85 2468 int timeout = done ? 100 : -1;
f15eb531 2469
46fb3c21
ACM
2470 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2471 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2472 draining = true;
2473
ba209f85 2474 goto again;
46fb3c21 2475 }
ba209f85
ACM
2476 } else {
2477 goto again;
f15eb531
NK
2478 }
2479
ba209f85 2480out_disable:
f3b623b8
ACM
2481 thread__zput(trace->current);
2482
ba209f85 2483 perf_evlist__disable(evlist);
514f1c67 2484
c522739d
ACM
2485 if (!err) {
2486 if (trace->summary)
2487 trace__fprintf_thread_summary(trace, trace->output);
2488
2489 if (trace->show_tool_stats) {
2490 fprintf(trace->output, "Stats:\n "
2491 " vfs_getname : %" PRIu64 "\n"
2492 " proc_getname: %" PRIu64 "\n",
2493 trace->stats.vfs_getname,
2494 trace->stats.proc_getname);
2495 }
2496 }
bf2575c1 2497
514f1c67 2498out_delete_evlist:
33974a41
AV
2499 trace__symbols__exit(trace);
2500
514f1c67 2501 perf_evlist__delete(evlist);
14a052df 2502 trace->evlist = NULL;
75b757ca 2503 trace->live = false;
514f1c67 2504 return err;
6ef068cb
ACM
2505{
2506 char errbuf[BUFSIZ];
a8f23d8f 2507
2cc990ba 2508out_error_sched_stat_runtime:
988bdb31 2509 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2510 goto out_error;
2511
801c67b0 2512out_error_raw_syscalls:
988bdb31 2513 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2514 goto out_error;
2515
e09b18d4
ACM
2516out_error_mmap:
2517 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2518 goto out_error;
2519
a8f23d8f
ACM
2520out_error_open:
2521 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2522
2523out_error:
6ef068cb 2524 fprintf(trace->output, "%s\n", errbuf);
87f91868 2525 goto out_delete_evlist;
94ad89bc
ACM
2526
2527out_error_apply_filters:
2528 fprintf(trace->output,
2529 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2530 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2531 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2532 goto out_delete_evlist;
514f1c67 2533}
5ed08dae
ACM
2534out_error_mem:
2535 fprintf(trace->output, "Not enough memory to run!\n");
2536 goto out_delete_evlist;
19867b61
ACM
2537
2538out_errno:
2539 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2540 goto out_delete_evlist;
a8f23d8f 2541}
514f1c67 2542
6810fc91
DA
2543static int trace__replay(struct trace *trace)
2544{
2545 const struct perf_evsel_str_handler handlers[] = {
c522739d 2546 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2547 };
8ceb41d7 2548 struct perf_data data = {
eae8ad80
JO
2549 .file = {
2550 .path = input_name,
2551 },
2552 .mode = PERF_DATA_MODE_READ,
2553 .force = trace->force,
f5fc1412 2554 };
6810fc91 2555 struct perf_session *session;
003824e8 2556 struct perf_evsel *evsel;
6810fc91
DA
2557 int err = -1;
2558
2559 trace->tool.sample = trace__process_sample;
2560 trace->tool.mmap = perf_event__process_mmap;
384c671e 2561 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2562 trace->tool.comm = perf_event__process_comm;
2563 trace->tool.exit = perf_event__process_exit;
2564 trace->tool.fork = perf_event__process_fork;
2565 trace->tool.attr = perf_event__process_attr;
f3b3614a 2566 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2567 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2568 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2569
0a8cb85c 2570 trace->tool.ordered_events = true;
6810fc91
DA
2571 trace->tool.ordering_requires_timestamps = true;
2572
2573 /* add tid to output */
2574 trace->multiple_threads = true;
2575
8ceb41d7 2576 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2577 if (session == NULL)
52e02834 2578 return -1;
6810fc91 2579
aa07df6e
DA
2580 if (trace->opts.target.pid)
2581 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2582
2583 if (trace->opts.target.tid)
2584 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2585
0a7e6d1b 2586 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2587 goto out;
2588
8fb598e5
DA
2589 trace->host = &session->machines.host;
2590
6810fc91
DA
2591 err = perf_session__set_tracepoints_handlers(session, handlers);
2592 if (err)
2593 goto out;
2594
003824e8
NK
2595 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2596 "raw_syscalls:sys_enter");
9aca7f17
DA
2597 /* older kernels have syscalls tp versus raw_syscalls */
2598 if (evsel == NULL)
2599 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2600 "syscalls:sys_enter");
003824e8 2601
e281a960
SF
2602 if (evsel &&
2603 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2604 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2605 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2606 goto out;
2607 }
2608
2609 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2610 "raw_syscalls:sys_exit");
9aca7f17
DA
2611 if (evsel == NULL)
2612 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2613 "syscalls:sys_exit");
e281a960
SF
2614 if (evsel &&
2615 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2616 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2617 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2618 goto out;
2619 }
2620
e5cadb93 2621 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2622 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2623 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2624 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2625 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2626 evsel->handler = trace__pgfault;
2627 }
2628
6810fc91
DA
2629 setup_pager();
2630
b7b61cbe 2631 err = perf_session__process_events(session);
6810fc91
DA
2632 if (err)
2633 pr_err("Failed to process events, error %d", err);
2634
bf2575c1
DA
2635 else if (trace->summary)
2636 trace__fprintf_thread_summary(trace, trace->output);
2637
6810fc91
DA
2638out:
2639 perf_session__delete(session);
2640
2641 return err;
2642}
2643
1302d88e
ACM
2644static size_t trace__fprintf_threads_header(FILE *fp)
2645{
2646 size_t printed;
2647
99ff7150 2648 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2649
2650 return printed;
2651}
2652
b535d523
ACM
2653DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2654 struct stats *stats;
2655 double msecs;
2656 int syscall;
2657)
2658{
2659 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2660 struct stats *stats = source->priv;
2661
2662 entry->syscall = source->i;
2663 entry->stats = stats;
2664 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2665}
2666
bf2575c1
DA
2667static size_t thread__dump_stats(struct thread_trace *ttrace,
2668 struct trace *trace, FILE *fp)
2669{
bf2575c1
DA
2670 size_t printed = 0;
2671 struct syscall *sc;
b535d523
ACM
2672 struct rb_node *nd;
2673 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2674
b535d523 2675 if (syscall_stats == NULL)
bf2575c1
DA
2676 return 0;
2677
2678 printed += fprintf(fp, "\n");
2679
834fd46d
MW
2680 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2681 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2682 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2683
98a91837 2684 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2685 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2686 if (stats) {
2687 double min = (double)(stats->min) / NSEC_PER_MSEC;
2688 double max = (double)(stats->max) / NSEC_PER_MSEC;
2689 double avg = avg_stats(stats);
2690 double pct;
2691 u64 n = (u64) stats->n;
2692
2693 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2694 avg /= NSEC_PER_MSEC;
2695
b535d523 2696 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2697 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2698 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2699 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2700 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2701 }
bf2575c1
DA
2702 }
2703
b535d523 2704 resort_rb__delete(syscall_stats);
bf2575c1 2705 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2706
2707 return printed;
2708}
2709
96c14451 2710static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2711{
96c14451 2712 size_t printed = 0;
89dceb22 2713 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2714 double ratio;
2715
2716 if (ttrace == NULL)
2717 return 0;
2718
2719 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2720
15e65c69 2721 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2722 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2723 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2724 if (ttrace->pfmaj)
2725 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2726 if (ttrace->pfmin)
2727 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2728 if (trace->sched)
2729 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2730 else if (fputc('\n', fp) != EOF)
2731 ++printed;
2732
bf2575c1 2733 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2734
96c14451
ACM
2735 return printed;
2736}
896cbb56 2737
96c14451
ACM
2738static unsigned long thread__nr_events(struct thread_trace *ttrace)
2739{
2740 return ttrace ? ttrace->nr_events : 0;
2741}
2742
2743DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2744 struct thread *thread;
2745)
2746{
2747 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2748}
2749
1302d88e
ACM
2750static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2751{
96c14451
ACM
2752 size_t printed = trace__fprintf_threads_header(fp);
2753 struct rb_node *nd;
91e467bc 2754 int i;
1302d88e 2755
91e467bc
KL
2756 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2757 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2758
91e467bc
KL
2759 if (threads == NULL) {
2760 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2761 return 0;
2762 }
896cbb56 2763
91e467bc
KL
2764 resort_rb__for_each_entry(nd, threads)
2765 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2766
91e467bc
KL
2767 resort_rb__delete(threads);
2768 }
96c14451 2769 return printed;
1302d88e
ACM
2770}
2771
ae9ed035
ACM
2772static int trace__set_duration(const struct option *opt, const char *str,
2773 int unset __maybe_unused)
2774{
2775 struct trace *trace = opt->value;
2776
2777 trace->duration_filter = atof(str);
2778 return 0;
2779}
2780
f078c385
ACM
2781static int trace__set_filter_pids(const struct option *opt, const char *str,
2782 int unset __maybe_unused)
2783{
2784 int ret = -1;
2785 size_t i;
2786 struct trace *trace = opt->value;
2787 /*
2788 * FIXME: introduce a intarray class, plain parse csv and create a
2789 * { int nr, int entries[] } struct...
2790 */
2791 struct intlist *list = intlist__new(str);
2792
2793 if (list == NULL)
2794 return -1;
2795
2796 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2797 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2798
2799 if (trace->filter_pids.entries == NULL)
2800 goto out;
2801
2802 trace->filter_pids.entries[0] = getpid();
2803
2804 for (i = 1; i < trace->filter_pids.nr; ++i)
2805 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2806
2807 intlist__delete(list);
2808 ret = 0;
2809out:
2810 return ret;
2811}
2812
c24ff998
ACM
2813static int trace__open_output(struct trace *trace, const char *filename)
2814{
2815 struct stat st;
2816
2817 if (!stat(filename, &st) && st.st_size) {
2818 char oldname[PATH_MAX];
2819
2820 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2821 unlink(oldname);
2822 rename(filename, oldname);
2823 }
2824
2825 trace->output = fopen(filename, "w");
2826
2827 return trace->output == NULL ? -errno : 0;
2828}
2829
598d02c5
SF
2830static int parse_pagefaults(const struct option *opt, const char *str,
2831 int unset __maybe_unused)
2832{
2833 int *trace_pgfaults = opt->value;
2834
2835 if (strcmp(str, "all") == 0)
2836 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2837 else if (strcmp(str, "maj") == 0)
2838 *trace_pgfaults |= TRACE_PFMAJ;
2839 else if (strcmp(str, "min") == 0)
2840 *trace_pgfaults |= TRACE_PFMIN;
2841 else
2842 return -1;
2843
2844 return 0;
2845}
2846
14a052df
ACM
2847static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2848{
2849 struct perf_evsel *evsel;
2850
e5cadb93 2851 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2852 evsel->handler = handler;
2853}
2854
017037ff
ACM
2855/*
2856 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2857 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2858 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2859 *
2860 * It'd be better to introduce a parse_options() variant that would return a
2861 * list with the terms it didn't match to an event...
2862 */
2863static int trace__parse_events_option(const struct option *opt, const char *str,
2864 int unset __maybe_unused)
2865{
2866 struct trace *trace = (struct trace *)opt->value;
2867 const char *s = str;
2868 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 2869 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
2870 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2871 char group_name[PATH_MAX];
2872
2873 if (strace_groups_dir == NULL)
2874 return -1;
2875
2876 if (*s == '!') {
2877 ++s;
2878 trace->not_ev_qualifier = true;
2879 }
2880
2881 while (1) {
2882 if ((sep = strchr(s, ',')) != NULL)
2883 *sep = '\0';
2884
2885 list = 0;
27702bcf
ACM
2886 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2887 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
2888 list = 1;
2889 } else {
2890 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2891 if (access(group_name, R_OK) == 0)
2892 list = 1;
2893 }
2894
2895 if (lists[list]) {
2896 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2897 } else {
2898 lists[list] = malloc(len);
2899 if (lists[list] == NULL)
2900 goto out;
2901 strcpy(lists[list], s);
2902 }
2903
2904 if (!sep)
2905 break;
2906
2907 *sep = ',';
2908 s = sep + 1;
2909 }
2910
2911 if (lists[1] != NULL) {
2912 struct strlist_config slist_config = {
2913 .dirname = strace_groups_dir,
2914 };
2915
2916 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2917 if (trace->ev_qualifier == NULL) {
2918 fputs("Not enough memory to parse event qualifier", trace->output);
2919 goto out;
2920 }
2921
2922 if (trace__validate_ev_qualifier(trace))
2923 goto out;
2924 }
2925
2926 err = 0;
2927
2928 if (lists[0]) {
2929 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2930 "event selector. use 'perf list' to list available events",
2931 parse_events_option);
2932 err = parse_events_option(&o, lists[0], 0);
2933 }
2934out:
2935 if (sep)
2936 *sep = ',';
2937
2938 return err;
2939}
2940
b0ad8ea6 2941int cmd_trace(int argc, const char **argv)
514f1c67 2942{
6fdd9cb7 2943 const char *trace_usage[] = {
f15eb531
NK
2944 "perf trace [<options>] [<command>]",
2945 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2946 "perf trace record [<options>] [<command>]",
2947 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2948 NULL
2949 };
2950 struct trace trace = {
514f1c67
ACM
2951 .syscalls = {
2952 . max = -1,
2953 },
2954 .opts = {
2955 .target = {
2956 .uid = UINT_MAX,
2957 .uses_mmap = true,
2958 },
2959 .user_freq = UINT_MAX,
2960 .user_interval = ULLONG_MAX,
509051ea 2961 .no_buffering = true,
38d5447d 2962 .mmap_pages = UINT_MAX,
9d9cad76 2963 .proc_map_timeout = 500,
514f1c67 2964 },
007d66a0 2965 .output = stderr,
50c95cbd 2966 .show_comm = true,
e281a960 2967 .trace_syscalls = true,
44621819 2968 .kernel_syscallchains = false,
05614993 2969 .max_stack = UINT_MAX,
514f1c67 2970 };
c24ff998 2971 const char *output_name = NULL;
514f1c67 2972 const struct option trace_options[] = {
017037ff
ACM
2973 OPT_CALLBACK('e', "event", &trace, "event",
2974 "event/syscall selector. use 'perf list' to list available events",
2975 trace__parse_events_option),
50c95cbd
ACM
2976 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2977 "show the thread COMM next to its id"),
c522739d 2978 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2979 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2980 trace__parse_events_option),
c24ff998 2981 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2982 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2983 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2984 "trace events on existing process id"),
ac9be8ee 2985 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2986 "trace events on existing thread id"),
fa0e4ffe
ACM
2987 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2988 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2989 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2990 "system-wide collection from all CPUs"),
ac9be8ee 2991 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2992 "list of cpus to monitor"),
6810fc91 2993 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2994 "child tasks do not inherit counters"),
994a1f78
JO
2995 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2996 "number of mmap data pages",
2997 perf_evlist__parse_mmap_pages),
ac9be8ee 2998 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2999 "user to profile"),
ae9ed035
ACM
3000 OPT_CALLBACK(0, "duration", &trace, "float",
3001 "show only events with duration > N.M ms",
3002 trace__set_duration),
1302d88e 3003 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3004 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3005 OPT_BOOLEAN('T', "time", &trace.full_time,
3006 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
3007 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3008 "Show only syscall summary with statistics"),
3009 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3010 "Show all syscalls and summary with statistics"),
598d02c5
SF
3011 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3012 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3013 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3014 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3015 OPT_CALLBACK(0, "call-graph", &trace.opts,
3016 "record_mode[,record_size]", record_callchain_help,
3017 &record_parse_callchain_opt),
44621819
ACM
3018 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3019 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3020 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3021 "Set the minimum stack depth when parsing the callchain, "
3022 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3023 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3024 "Set the maximum stack depth when parsing the callchain, "
3025 "anything beyond the specified depth will be ignored. "
4cb93446 3026 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
3027 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3028 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
3029 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3030 "ms to wait before starting measurement after program "
3031 "start"),
514f1c67
ACM
3032 OPT_END()
3033 };
ccd62a89 3034 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3035 bool mmap_pages_user_set = true;
6fdd9cb7 3036 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 3037 int err;
32caf0d1 3038 char bf[BUFSIZ];
514f1c67 3039
4d08cb80
ACM
3040 signal(SIGSEGV, sighandler_dump_stack);
3041 signal(SIGFPE, sighandler_dump_stack);
3042
14a052df 3043 trace.evlist = perf_evlist__new();
fd0db102 3044 trace.sctbl = syscalltbl__new();
14a052df 3045
fd0db102 3046 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3047 pr_err("Not enough memory to run!\n");
ff8f695c 3048 err = -ENOMEM;
14a052df
ACM
3049 goto out;
3050 }
3051
6fdd9cb7
YS
3052 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3053 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3054
d7888573
WN
3055 err = bpf__setup_stdout(trace.evlist);
3056 if (err) {
3057 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3058 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3059 goto out;
3060 }
3061
59247e33
ACM
3062 err = -1;
3063
598d02c5
SF
3064 if (trace.trace_pgfaults) {
3065 trace.opts.sample_address = true;
3066 trace.opts.sample_time = true;
3067 }
3068
f3e459d1
ACM
3069 if (trace.opts.mmap_pages == UINT_MAX)
3070 mmap_pages_user_set = false;
3071
05614993 3072 if (trace.max_stack == UINT_MAX) {
fe176085 3073 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
3074 max_stack_user_set = false;
3075 }
3076
3077#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 3078 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
3079 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3080#endif
3081
2ddd5c04 3082 if (callchain_param.enabled) {
f3e459d1
ACM
3083 if (!mmap_pages_user_set && geteuid() == 0)
3084 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3085
566a0885 3086 symbol_conf.use_callchain = true;
f3e459d1 3087 }
566a0885 3088
14a052df
ACM
3089 if (trace.evlist->nr_entries > 0)
3090 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3091
1e28fe0a
SF
3092 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3093 return trace__record(&trace, argc-1, &argv[1]);
3094
3095 /* summary_only implies summary option, but don't overwrite summary if set */
3096 if (trace.summary_only)
3097 trace.summary = trace.summary_only;
3098
726f3234
ACM
3099 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3100 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3101 pr_err("Please specify something to trace.\n");
3102 return -1;
3103 }
3104
017037ff 3105 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3106 pr_err("The -e option can't be used with --no-syscalls.\n");
3107 goto out;
3108 }
3109
c24ff998
ACM
3110 if (output_name != NULL) {
3111 err = trace__open_output(&trace, output_name);
3112 if (err < 0) {
3113 perror("failed to create output file");
3114 goto out;
3115 }
3116 }
3117
fd0db102
ACM
3118 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3119
602ad878 3120 err = target__validate(&trace.opts.target);
32caf0d1 3121 if (err) {
602ad878 3122 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3123 fprintf(trace.output, "%s", bf);
3124 goto out_close;
32caf0d1
NK
3125 }
3126
602ad878 3127 err = target__parse_uid(&trace.opts.target);
514f1c67 3128 if (err) {
602ad878 3129 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3130 fprintf(trace.output, "%s", bf);
3131 goto out_close;
514f1c67
ACM
3132 }
3133
602ad878 3134 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3135 trace.opts.target.system_wide = true;
3136
6810fc91
DA
3137 if (input_name)
3138 err = trace__replay(&trace);
3139 else
3140 err = trace__run(&trace, argc, argv);
1302d88e 3141
c24ff998
ACM
3142out_close:
3143 if (output_name != NULL)
3144 fclose(trace.output);
3145out:
1302d88e 3146 return err;
514f1c67 3147}