]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - tools/perf/builtin-trace.c
perf trace: Add --print-sample
[mirror_ubuntu-focal-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
092bd3cd 24#include "util/env.h"
5ab8c689 25#include "util/event.h"
514f1c67 26#include "util/evlist.h"
4b6ab94e 27#include <subcmd/exec-cmd.h>
752fde44 28#include "util/machine.h"
9a3993d4 29#include "util/path.h"
6810fc91 30#include "util/session.h"
752fde44 31#include "util/thread.h"
4b6ab94e 32#include <subcmd/parse-options.h>
2ae3a312 33#include "util/strlist.h"
bdc89661 34#include "util/intlist.h"
514f1c67 35#include "util/thread_map.h"
bf2575c1 36#include "util/stat.h"
fd5cead2 37#include "trace/beauty/beauty.h"
97978b3e 38#include "trace-event.h"
9aca7f17 39#include "util/parse-events.h"
ba504235 40#include "util/bpf-loader.h"
566a0885 41#include "callchain.h"
fea01392 42#include "print_binary.h"
a067558e 43#include "string2.h"
fd0db102 44#include "syscalltbl.h"
96c14451 45#include "rb_resort.h"
514f1c67 46
a43783ae 47#include <errno.h>
fd20e811 48#include <inttypes.h>
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c 54#include <linux/filter.h>
877a7a11 55#include <linux/kernel.h>
39878d49 56#include <linux/random.h>
c6d4a494 57#include <linux/stringify.h>
bd48c63e 58#include <linux/time64.h>
514f1c67 59
3d689ed6
ACM
60#include "sane_ctype.h"
61
c188e7ac
ACM
62#ifndef O_CLOEXEC
63# define O_CLOEXEC 02000000
64#endif
65
83a51694
ACM
66#ifndef F_LINUX_SPECIFIC_BASE
67# define F_LINUX_SPECIFIC_BASE 1024
68#endif
69
d1d438a3
ACM
70struct trace {
71 struct perf_tool tool;
fd0db102 72 struct syscalltbl *sctbl;
d1d438a3
ACM
73 struct {
74 int max;
75 struct syscall *table;
76 struct {
77 struct perf_evsel *sys_enter,
78 *sys_exit;
79 } events;
80 } syscalls;
81 struct record_opts opts;
82 struct perf_evlist *evlist;
83 struct machine *host;
84 struct thread *current;
85 u64 base_time;
86 FILE *output;
87 unsigned long nr_events;
88 struct strlist *ev_qualifier;
89 struct {
90 size_t nr;
91 int *entries;
92 } ev_qualifier_ids;
d1d438a3
ACM
93 struct {
94 size_t nr;
95 pid_t *entries;
96 } filter_pids;
97 double duration_filter;
98 double runtime_ms;
99 struct {
100 u64 vfs_getname,
101 proc_getname;
102 } stats;
c6d4a494 103 unsigned int max_stack;
5cf9c84e 104 unsigned int min_stack;
d1d438a3
ACM
105 bool not_ev_qualifier;
106 bool live;
107 bool full_time;
108 bool sched;
109 bool multiple_threads;
110 bool summary;
111 bool summary_only;
112 bool show_comm;
591421e1 113 bool print_sample;
d1d438a3
ACM
114 bool show_tool_stats;
115 bool trace_syscalls;
44621819 116 bool kernel_syscallchains;
d1d438a3
ACM
117 bool force;
118 bool vfs_getname;
119 int trace_pgfaults;
fd0db102 120 int open_id;
d1d438a3 121};
a1c2552d 122
77170988
ACM
123struct tp_field {
124 int offset;
125 union {
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128 };
129};
130
131#define TP_UINT_FIELD(bits) \
132static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
55d43bca
DA
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return value; \
77170988
ACM
137}
138
139TP_UINT_FIELD(8);
140TP_UINT_FIELD(16);
141TP_UINT_FIELD(32);
142TP_UINT_FIELD(64);
143
144#define TP_UINT_FIELD__SWAPPED(bits) \
145static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146{ \
55d43bca
DA
147 u##bits value; \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
149 return bswap_##bits(value);\
150}
151
152TP_UINT_FIELD__SWAPPED(16);
153TP_UINT_FIELD__SWAPPED(32);
154TP_UINT_FIELD__SWAPPED(64);
155
156static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
158 bool needs_swap)
159{
160 field->offset = format_field->offset;
161
162 switch (format_field->size) {
163 case 1:
164 field->integer = tp_field__u8;
165 break;
166 case 2:
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168 break;
169 case 4:
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171 break;
172 case 8:
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174 break;
175 default:
176 return -1;
177 }
178
179 return 0;
180}
181
182static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183{
184 return sample->raw_data + field->offset;
185}
186
187static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188{
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
191 return 0;
192}
193
194struct syscall_tp {
195 struct tp_field id;
196 union {
197 struct tp_field args, ret;
198 };
199};
200
201static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
211}
212
213#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
219 const char *name)
220{
221 struct format_field *format_field = perf_evsel__field(evsel, name);
222
223 if (format_field == NULL)
224 return -1;
225
226 return tp_field__init_ptr(field, format_field);
227}
228
229#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234{
04662523 235 zfree(&evsel->priv);
77170988
ACM
236 perf_evsel__delete(evsel);
237}
238
96695d44
NK
239static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240{
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244 goto out_delete;
245
246 evsel->handler = handler;
247 return 0;
248 }
249
250 return -ENOMEM;
251
252out_delete:
04662523 253 zfree(&evsel->priv);
96695d44
NK
254 return -ENOENT;
255}
256
ef503831 257static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 258{
ef503831 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 260
9aca7f17 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 262 if (IS_ERR(evsel))
9aca7f17
DA
263 evsel = perf_evsel__newtp("syscalls", direction);
264
8dd2a131
JO
265 if (IS_ERR(evsel))
266 return NULL;
267
268 if (perf_evsel__init_syscall_tp(evsel, handler))
269 goto out_delete;
77170988
ACM
270
271 return evsel;
272
273out_delete:
274 perf_evsel__delete_priv(evsel);
275 return NULL;
276}
277
278#define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
281
282#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
285
0ae79636
ACM
286size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287{
288 int idx = val - sa->offset;
1f115cb7 289
0ae79636
ACM
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
1f115cb7 292
0ae79636 293 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
0ae79636 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
301}
302
975b7c2f
ACM
303static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
305{
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307}
308
1f115cb7
ACM
309#define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
83a51694
ACM
311struct strarrays {
312 int nr_entries;
313 struct strarray **entries;
314};
315
316#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
318 .entries = array, \
319}
320
274e86fd
ACM
321size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
83a51694
ACM
323{
324 struct strarrays *sas = arg->parm;
325 int i;
326
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
330
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
333 break;
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
335 }
336 }
337
338 return scnprintf(bf, size, "%d", arg->val);
339}
340
48e1f91a
ACM
341#ifndef AT_FDCWD
342#define AT_FDCWD -100
343#endif
344
75b757ca
ACM
345static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
346 struct syscall_arg *arg)
347{
348 int fd = arg->val;
349
350 if (fd == AT_FDCWD)
351 return scnprintf(bf, size, "CWD");
352
353 return syscall_arg__scnprintf_fd(bf, size, arg);
354}
355
356#define SCA_FDAT syscall_arg__scnprintf_fd_at
357
358static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
359 struct syscall_arg *arg);
360
361#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
362
2c2b1623 363size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 364{
01533e97 365 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
366}
367
2c2b1623 368size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
369{
370 return scnprintf(bf, size, "%d", arg->val);
371}
372
5dde91ed
ACM
373size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
374{
375 return scnprintf(bf, size, "%ld", arg->val);
376}
377
729a7841
ACM
378static const char *bpf_cmd[] = {
379 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
380 "MAP_GET_NEXT_KEY", "PROG_LOAD",
381};
382static DEFINE_STRARRAY(bpf_cmd);
383
03e3adc9
ACM
384static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
385static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 386
1f115cb7
ACM
387static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
388static DEFINE_STRARRAY(itimers);
389
b62bee1b
ACM
390static const char *keyctl_options[] = {
391 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
392 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
393 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
394 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
395 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
396};
397static DEFINE_STRARRAY(keyctl_options);
398
efe6b882
ACM
399static const char *whences[] = { "SET", "CUR", "END",
400#ifdef SEEK_DATA
401"DATA",
402#endif
403#ifdef SEEK_HOLE
404"HOLE",
405#endif
406};
407static DEFINE_STRARRAY(whences);
f9da0b0c 408
80f587d5
ACM
409static const char *fcntl_cmds[] = {
410 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
411 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
412 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
413 "GETOWNER_UIDS",
80f587d5
ACM
414};
415static DEFINE_STRARRAY(fcntl_cmds);
416
83a51694
ACM
417static const char *fcntl_linux_specific_cmds[] = {
418 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
419 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 420 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
421};
422
423static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
424
425static struct strarray *fcntl_cmds_arrays[] = {
426 &strarray__fcntl_cmds,
427 &strarray__fcntl_linux_specific_cmds,
428};
429
430static DEFINE_STRARRAYS(fcntl_cmds_arrays);
431
c045bf02
ACM
432static const char *rlimit_resources[] = {
433 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
434 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
435 "RTTIME",
436};
437static DEFINE_STRARRAY(rlimit_resources);
438
eb5b1b14
ACM
439static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
440static DEFINE_STRARRAY(sighow);
441
4f8c1b74
DA
442static const char *clockid[] = {
443 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
444 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
445 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
446};
447static DEFINE_STRARRAY(clockid);
448
e10bce81
ACM
449static const char *socket_families[] = {
450 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
451 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
452 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
453 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
454 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
455 "ALG", "NFC", "VSOCK",
456};
457static DEFINE_STRARRAY(socket_families);
458
51108999
ACM
459static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
460 struct syscall_arg *arg)
461{
462 size_t printed = 0;
463 int mode = arg->val;
464
465 if (mode == F_OK) /* 0 */
466 return scnprintf(bf, size, "F");
467#define P_MODE(n) \
468 if (mode & n##_OK) { \
469 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
470 mode &= ~n##_OK; \
471 }
472
473 P_MODE(R);
474 P_MODE(W);
475 P_MODE(X);
476#undef P_MODE
477
478 if (mode)
479 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
480
481 return printed;
482}
483
484#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
485
f994592d
ACM
486static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
487 struct syscall_arg *arg);
488
489#define SCA_FILENAME syscall_arg__scnprintf_filename
490
46cce19b
ACM
491static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
492 struct syscall_arg *arg)
493{
494 int printed = 0, flags = arg->val;
495
496#define P_FLAG(n) \
497 if (flags & O_##n) { \
498 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
499 flags &= ~O_##n; \
500 }
501
502 P_FLAG(CLOEXEC);
503 P_FLAG(NONBLOCK);
504#undef P_FLAG
505
506 if (flags)
507 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
508
509 return printed;
510}
511
512#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
513
a355a61e
ACM
514#ifndef GRND_NONBLOCK
515#define GRND_NONBLOCK 0x0001
516#endif
517#ifndef GRND_RANDOM
518#define GRND_RANDOM 0x0002
519#endif
520
39878d49
ACM
521static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
522 struct syscall_arg *arg)
523{
524 int printed = 0, flags = arg->val;
525
526#define P_FLAG(n) \
527 if (flags & GRND_##n) { \
528 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
529 flags &= ~GRND_##n; \
530 }
531
532 P_FLAG(RANDOM);
533 P_FLAG(NONBLOCK);
534#undef P_FLAG
535
536 if (flags)
537 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
538
539 return printed;
540}
541
542#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
543
82d4a110
ACM
544#define STRARRAY(name, array) \
545 { .scnprintf = SCA_STRARRAY, \
546 .parm = &strarray__##array, }
453350dd 547
092bd3cd 548#include "trace/beauty/arch_errno_names.c"
ea8dc3ce 549#include "trace/beauty/eventfd.c"
8bf382ce 550#include "trace/beauty/flock.c"
d5d71e86 551#include "trace/beauty/futex_op.c"
df4cb167 552#include "trace/beauty/mmap.c"
ba2f22cf 553#include "trace/beauty/mode_t.c"
a30e6259 554#include "trace/beauty/msg_flags.c"
8f48df69 555#include "trace/beauty/open_flags.c"
62de344e 556#include "trace/beauty/perf_event_open.c"
d5d71e86 557#include "trace/beauty/pid.c"
a3bca91f 558#include "trace/beauty/sched_policy.c"
f5cd95ea 559#include "trace/beauty/seccomp.c"
12199d8e 560#include "trace/beauty/signum.c"
bbf86c43 561#include "trace/beauty/socket_type.c"
7206b900 562#include "trace/beauty/waitid_options.c"
a3bca91f 563
82d4a110
ACM
564struct syscall_arg_fmt {
565 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
566 void *parm;
c51bdfec 567 const char *name;
d47737d5 568 bool show_zero;
82d4a110
ACM
569};
570
514f1c67
ACM
571static struct syscall_fmt {
572 const char *name;
aec1930b 573 const char *alias;
82d4a110 574 struct syscall_arg_fmt arg[6];
332337da 575 u8 nr_args;
11c8e39f 576 bool errpid;
514f1c67 577 bool timeout;
04b34729 578 bool hexret;
514f1c67 579} syscall_fmts[] = {
1f63139c 580 { .name = "access",
82d4a110 581 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c 582 { .name = "bpf",
82d4a110 583 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 584 { .name = "brk", .hexret = true,
82d4a110 585 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 586 { .name = "clock_gettime",
82d4a110 587 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
33396a3a
ACM
588 { .name = "clone", .errpid = true, .nr_args = 5,
589 .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
590 [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
591 [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
592 [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
593 [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
1f63139c 594 { .name = "close",
82d4a110 595 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 596 { .name = "epoll_ctl",
82d4a110 597 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 598 { .name = "eventfd2",
82d4a110 599 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 600 { .name = "fchmodat",
82d4a110 601 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 602 { .name = "fchownat",
82d4a110 603 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 604 { .name = "fcntl",
82d4a110 605 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
606 .parm = &strarrays__fcntl_cmds_arrays,
607 .show_zero = true, },
82d4a110 608 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 609 { .name = "flock",
82d4a110 610 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
611 { .name = "fstat", .alias = "newfstat", },
612 { .name = "fstatat", .alias = "newfstatat", },
613 { .name = "futex",
82d4a110 614 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
1f63139c 615 { .name = "futimesat",
82d4a110 616 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 617 { .name = "getitimer",
82d4a110 618 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 619 { .name = "getpid", .errpid = true, },
d1d438a3 620 { .name = "getpgid", .errpid = true, },
c65f1070 621 { .name = "getppid", .errpid = true, },
1f63139c 622 { .name = "getrandom",
82d4a110 623 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 624 { .name = "getrlimit",
82d4a110 625 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
2d1073de 626 { .name = "gettid", .errpid = true, },
1f63139c 627 { .name = "ioctl",
82d4a110 628 .arg = {
844ae5b4
ACM
629#if defined(__i386__) || defined(__x86_64__)
630/*
631 * FIXME: Make this available to all arches.
632 */
1cc47f2d 633 [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
82d4a110 634 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 635#else
82d4a110 636 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 637#endif
1de3038d
ACM
638 { .name = "kcmp", .nr_args = 5,
639 .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
640 [1] = { .name = "pid2", .scnprintf = SCA_PID, },
641 [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
642 [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
643 [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
1f63139c 644 { .name = "keyctl",
82d4a110 645 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 646 { .name = "kill",
82d4a110 647 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 648 { .name = "linkat",
82d4a110 649 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 650 { .name = "lseek",
82d4a110 651 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
652 { .name = "lstat", .alias = "newlstat", },
653 { .name = "madvise",
82d4a110
ACM
654 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
655 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 656 { .name = "mkdirat",
82d4a110 657 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 658 { .name = "mknodat",
82d4a110 659 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 660 { .name = "mlock",
82d4a110 661 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 662 { .name = "mlockall",
82d4a110 663 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 664 { .name = "mmap", .hexret = true,
54265664
JO
665/* The standard mmap maps to old_mmap on s390x */
666#if defined(__s390x__)
667 .alias = "old_mmap",
668#endif
82d4a110
ACM
669 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
670 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
671 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 672 { .name = "mprotect",
82d4a110
ACM
673 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
674 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 675 { .name = "mq_unlink",
82d4a110 676 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 677 { .name = "mremap", .hexret = true,
82d4a110
ACM
678 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
679 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
680 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 681 { .name = "munlock",
82d4a110 682 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 683 { .name = "munmap",
82d4a110 684 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 685 { .name = "name_to_handle_at",
82d4a110 686 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 687 { .name = "newfstatat",
82d4a110 688 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 689 { .name = "open",
82d4a110 690 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 691 { .name = "open_by_handle_at",
82d4a110
ACM
692 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
693 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 694 { .name = "openat",
82d4a110
ACM
695 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
696 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 697 { .name = "perf_event_open",
82d4a110
ACM
698 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
699 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
700 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 701 { .name = "pipe2",
82d4a110 702 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
83bc9c37
ACM
703 { .name = "pkey_alloc",
704 .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
705 { .name = "pkey_free",
706 .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
707 { .name = "pkey_mprotect",
708 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
709 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
710 [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
1f63139c
ACM
711 { .name = "poll", .timeout = true, },
712 { .name = "ppoll", .timeout = true, },
d688d037
ACM
713 { .name = "prctl", .alias = "arch_prctl",
714 .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
715 [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
716 [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
1f63139c
ACM
717 { .name = "pread", .alias = "pread64", },
718 { .name = "preadv", .alias = "pread", },
719 { .name = "prlimit64",
82d4a110 720 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
721 { .name = "pwrite", .alias = "pwrite64", },
722 { .name = "readlinkat",
82d4a110 723 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 724 { .name = "recvfrom",
82d4a110 725 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 726 { .name = "recvmmsg",
82d4a110 727 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 728 { .name = "recvmsg",
82d4a110 729 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 730 { .name = "renameat",
82d4a110 731 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 732 { .name = "rt_sigaction",
82d4a110 733 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 734 { .name = "rt_sigprocmask",
82d4a110 735 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 736 { .name = "rt_sigqueueinfo",
82d4a110 737 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 738 { .name = "rt_tgsigqueueinfo",
82d4a110 739 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 740 { .name = "sched_setscheduler",
82d4a110 741 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 742 { .name = "seccomp",
82d4a110
ACM
743 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
744 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
745 { .name = "select", .timeout = true, },
746 { .name = "sendmmsg",
82d4a110 747 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 748 { .name = "sendmsg",
82d4a110 749 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 750 { .name = "sendto",
82d4a110 751 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 752 { .name = "set_tid_address", .errpid = true, },
1f63139c 753 { .name = "setitimer",
82d4a110 754 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 755 { .name = "setrlimit",
82d4a110 756 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 757 { .name = "socket",
82d4a110
ACM
758 .arg = { [0] = STRARRAY(family, socket_families),
759 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c 760 { .name = "socketpair",
82d4a110
ACM
761 .arg = { [0] = STRARRAY(family, socket_families),
762 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c
ACM
763 { .name = "stat", .alias = "newstat", },
764 { .name = "statx",
82d4a110
ACM
765 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
766 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
767 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 768 { .name = "swapoff",
82d4a110 769 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 770 { .name = "swapon",
82d4a110 771 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 772 { .name = "symlinkat",
82d4a110 773 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 774 { .name = "tgkill",
82d4a110 775 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 776 { .name = "tkill",
82d4a110 777 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
778 { .name = "uname", .alias = "newuname", },
779 { .name = "unlinkat",
82d4a110 780 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 781 { .name = "utimensat",
82d4a110 782 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 783 { .name = "wait4", .errpid = true,
82d4a110 784 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 785 { .name = "waitid", .errpid = true,
82d4a110 786 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
787};
788
789static int syscall_fmt__cmp(const void *name, const void *fmtp)
790{
791 const struct syscall_fmt *fmt = fmtp;
792 return strcmp(name, fmt->name);
793}
794
795static struct syscall_fmt *syscall_fmt__find(const char *name)
796{
797 const int nmemb = ARRAY_SIZE(syscall_fmts);
798 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
799}
800
801struct syscall {
802 struct event_format *tp_format;
f208bd8d
ACM
803 int nr_args;
804 struct format_field *args;
514f1c67 805 const char *name;
5089f20e 806 bool is_exit;
514f1c67 807 struct syscall_fmt *fmt;
82d4a110 808 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
809};
810
fd2b2975
ACM
811/*
812 * We need to have this 'calculated' boolean because in some cases we really
813 * don't know what is the duration of a syscall, for instance, when we start
814 * a session and some threads are waiting for a syscall to finish, say 'poll',
815 * in which case all we can do is to print "( ? ) for duration and for the
816 * start timestamp.
817 */
818static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
819{
820 double duration = (double)t / NSEC_PER_MSEC;
821 size_t printed = fprintf(fp, "(");
822
fd2b2975
ACM
823 if (!calculated)
824 printed += fprintf(fp, " ? ");
825 else if (duration >= 1.0)
60c907ab
ACM
826 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
827 else if (duration >= 0.01)
828 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
829 else
830 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 831 return printed + fprintf(fp, "): ");
60c907ab
ACM
832}
833
f994592d
ACM
834/**
835 * filename.ptr: The filename char pointer that will be vfs_getname'd
836 * filename.entry_str_pos: Where to insert the string translated from
837 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
838 * ret_scnprintf: syscall args may set this to a different syscall return
839 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 840 */
752fde44
ACM
841struct thread_trace {
842 u64 entry_time;
752fde44 843 bool entry_pending;
efd5745e 844 unsigned long nr_events;
a2ea67d7 845 unsigned long pfmaj, pfmin;
752fde44 846 char *entry_str;
1302d88e 847 double runtime_ms;
7ee57434 848 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
849 struct {
850 unsigned long ptr;
7f4f8001
ACM
851 short int entry_str_pos;
852 bool pending_open;
853 unsigned int namelen;
854 char *name;
f994592d 855 } filename;
75b757ca
ACM
856 struct {
857 int max;
858 char **table;
859 } paths;
bf2575c1
DA
860
861 struct intlist *syscall_stats;
752fde44
ACM
862};
863
864static struct thread_trace *thread_trace__new(void)
865{
75b757ca
ACM
866 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
867
868 if (ttrace)
869 ttrace->paths.max = -1;
870
bf2575c1
DA
871 ttrace->syscall_stats = intlist__new(NULL);
872
75b757ca 873 return ttrace;
752fde44
ACM
874}
875
c24ff998 876static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 877{
efd5745e
ACM
878 struct thread_trace *ttrace;
879
752fde44
ACM
880 if (thread == NULL)
881 goto fail;
882
89dceb22
NK
883 if (thread__priv(thread) == NULL)
884 thread__set_priv(thread, thread_trace__new());
48000a1a 885
89dceb22 886 if (thread__priv(thread) == NULL)
752fde44
ACM
887 goto fail;
888
89dceb22 889 ttrace = thread__priv(thread);
efd5745e
ACM
890 ++ttrace->nr_events;
891
892 return ttrace;
752fde44 893fail:
c24ff998 894 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
895 "WARNING: not enough memory, dropping samples!\n");
896 return NULL;
897}
898
84486caa
ACM
899
900void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 901 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
902{
903 struct thread_trace *ttrace = thread__priv(arg->thread);
904
905 ttrace->ret_scnprintf = ret_scnprintf;
906}
907
598d02c5
SF
908#define TRACE_PFMAJ (1 << 0)
909#define TRACE_PFMIN (1 << 1)
910
e4d44e83
ACM
911static const size_t trace__entry_str_size = 2048;
912
97119f37 913static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 914{
89dceb22 915 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
916
917 if (fd > ttrace->paths.max) {
918 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
919
920 if (npath == NULL)
921 return -1;
922
923 if (ttrace->paths.max != -1) {
924 memset(npath + ttrace->paths.max + 1, 0,
925 (fd - ttrace->paths.max) * sizeof(char *));
926 } else {
927 memset(npath, 0, (fd + 1) * sizeof(char *));
928 }
929
930 ttrace->paths.table = npath;
931 ttrace->paths.max = fd;
932 }
933
934 ttrace->paths.table[fd] = strdup(pathname);
935
936 return ttrace->paths.table[fd] != NULL ? 0 : -1;
937}
938
97119f37
ACM
939static int thread__read_fd_path(struct thread *thread, int fd)
940{
941 char linkname[PATH_MAX], pathname[PATH_MAX];
942 struct stat st;
943 int ret;
944
945 if (thread->pid_ == thread->tid) {
946 scnprintf(linkname, sizeof(linkname),
947 "/proc/%d/fd/%d", thread->pid_, fd);
948 } else {
949 scnprintf(linkname, sizeof(linkname),
950 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
951 }
952
953 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
954 return -1;
955
956 ret = readlink(linkname, pathname, sizeof(pathname));
957
958 if (ret < 0 || ret > st.st_size)
959 return -1;
960
961 pathname[ret] = '\0';
962 return trace__set_fd_pathname(thread, fd, pathname);
963}
964
c522739d
ACM
965static const char *thread__fd_path(struct thread *thread, int fd,
966 struct trace *trace)
75b757ca 967{
89dceb22 968 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
969
970 if (ttrace == NULL)
971 return NULL;
972
973 if (fd < 0)
974 return NULL;
975
cdcd1e6b 976 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
977 if (!trace->live)
978 return NULL;
979 ++trace->stats.proc_getname;
cdcd1e6b 980 if (thread__read_fd_path(thread, fd))
c522739d
ACM
981 return NULL;
982 }
75b757ca
ACM
983
984 return ttrace->paths.table[fd];
985}
986
fc65eb82 987size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
988{
989 int fd = arg->val;
990 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 991 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
992
993 if (path)
994 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
995
996 return printed;
997}
998
0a2f7540
ACM
999size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
1000{
1001 size_t printed = scnprintf(bf, size, "%d", fd);
1002 struct thread *thread = machine__find_thread(trace->host, pid, pid);
1003
1004 if (thread) {
1005 const char *path = thread__fd_path(thread, fd, trace);
1006
1007 if (path)
1008 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1009
1010 thread__put(thread);
1011 }
1012
1013 return printed;
1014}
1015
75b757ca
ACM
1016static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1017 struct syscall_arg *arg)
1018{
1019 int fd = arg->val;
1020 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1021 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1022
04662523
ACM
1023 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1024 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1025
1026 return printed;
1027}
1028
f994592d
ACM
1029static void thread__set_filename_pos(struct thread *thread, const char *bf,
1030 unsigned long ptr)
1031{
1032 struct thread_trace *ttrace = thread__priv(thread);
1033
1034 ttrace->filename.ptr = ptr;
1035 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1036}
1037
1038static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1039 struct syscall_arg *arg)
1040{
1041 unsigned long ptr = arg->val;
1042
1043 if (!arg->trace->vfs_getname)
1044 return scnprintf(bf, size, "%#x", ptr);
1045
1046 thread__set_filename_pos(arg->thread, bf, ptr);
1047 return 0;
1048}
1049
ae9ed035
ACM
1050static bool trace__filter_duration(struct trace *trace, double t)
1051{
1052 return t < (trace->duration_filter * NSEC_PER_MSEC);
1053}
1054
fd2b2975 1055static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1056{
1057 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1058
60c907ab 1059 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1060}
1061
fd2b2975
ACM
1062/*
1063 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1064 * using ttrace->entry_time for a thread that receives a sys_exit without
1065 * first having received a sys_enter ("poll" issued before tracing session
1066 * starts, lost sys_enter exit due to ring buffer overflow).
1067 */
1068static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1069{
1070 if (tstamp > 0)
1071 return __trace__fprintf_tstamp(trace, tstamp, fp);
1072
1073 return fprintf(fp, " ? ");
1074}
1075
f15eb531 1076static bool done = false;
ba209f85 1077static bool interrupted = false;
f15eb531 1078
ba209f85 1079static void sig_handler(int sig)
f15eb531
NK
1080{
1081 done = true;
ba209f85 1082 interrupted = sig == SIGINT;
f15eb531
NK
1083}
1084
752fde44 1085static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1086 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1087{
1088 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1089 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1090
50c95cbd
ACM
1091 if (trace->multiple_threads) {
1092 if (trace->show_comm)
1902efe7 1093 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1094 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1095 }
752fde44
ACM
1096
1097 return printed;
1098}
1099
c24ff998 1100static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1101 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1102{
1103 int ret = 0;
1104
1105 switch (event->header.type) {
1106 case PERF_RECORD_LOST:
c24ff998 1107 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1108 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1109 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1110 break;
752fde44 1111 default:
162f0bef 1112 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1113 break;
1114 }
1115
1116 return ret;
1117}
1118
c24ff998 1119static int trace__tool_process(struct perf_tool *tool,
752fde44 1120 union perf_event *event,
162f0bef 1121 struct perf_sample *sample,
752fde44
ACM
1122 struct machine *machine)
1123{
c24ff998 1124 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1125 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1126}
1127
caf8a0d0
ACM
1128static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1129{
1130 struct machine *machine = vmachine;
1131
1132 if (machine->kptr_restrict_warned)
1133 return NULL;
1134
1135 if (symbol_conf.kptr_restrict) {
1136 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1137 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1138 "Kernel samples will not be resolved.\n");
1139 machine->kptr_restrict_warned = true;
1140 return NULL;
1141 }
1142
1143 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1144}
1145
752fde44
ACM
1146static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1147{
0a7e6d1b 1148 int err = symbol__init(NULL);
752fde44
ACM
1149
1150 if (err)
1151 return err;
1152
8fb598e5
DA
1153 trace->host = machine__new_host();
1154 if (trace->host == NULL)
1155 return -ENOMEM;
752fde44 1156
cbd5c178
AV
1157 err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
1158 if (err < 0)
1159 goto out;
706c3da4 1160
a33fbd56 1161 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76 1162 evlist->threads, trace__tool_process, false,
340b47f5 1163 trace->opts.proc_map_timeout, 1);
cbd5c178 1164out:
752fde44
ACM
1165 if (err)
1166 symbol__exit();
1167
1168 return err;
1169}
1170
33974a41
AV
1171static void trace__symbols__exit(struct trace *trace)
1172{
1173 machine__exit(trace->host);
1174 trace->host = NULL;
1175
1176 symbol__exit();
1177}
1178
5e58fcfa 1179static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
13d4ff3e 1180{
5e58fcfa 1181 int idx;
13d4ff3e 1182
332337da
ACM
1183 if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
1184 nr_args = sc->fmt->nr_args;
1185
5e58fcfa 1186 sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
82d4a110 1187 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1188 return -1;
1189
5e58fcfa
ACM
1190 for (idx = 0; idx < nr_args; ++idx) {
1191 if (sc->fmt)
82d4a110 1192 sc->arg_fmt[idx] = sc->fmt->arg[idx];
5e58fcfa 1193 }
82d4a110 1194
5e58fcfa
ACM
1195 sc->nr_args = nr_args;
1196 return 0;
1197}
1198
1199static int syscall__set_arg_fmts(struct syscall *sc)
1200{
1201 struct format_field *field;
1202 int idx = 0, len;
1203
1204 for (field = sc->args; field; field = field->next, ++idx) {
1205 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1206 continue;
1f115cb7 1207
82d4a110 1208 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1209 (strcmp(field->name, "filename") == 0 ||
1210 strcmp(field->name, "path") == 0 ||
1211 strcmp(field->name, "pathname") == 0))
82d4a110 1212 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1213 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1214 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1215 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1216 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1217 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1218 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1219 else if ((strcmp(field->type, "int") == 0 ||
1220 strcmp(field->type, "unsigned int") == 0 ||
1221 strcmp(field->type, "long") == 0) &&
1222 (len = strlen(field->name)) >= 2 &&
1223 strcmp(field->name + len - 2, "fd") == 0) {
1224 /*
1225 * /sys/kernel/tracing/events/syscalls/sys_enter*
1226 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1227 * 65 int
1228 * 23 unsigned int
1229 * 7 unsigned long
1230 */
82d4a110 1231 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1232 }
13d4ff3e
ACM
1233 }
1234
1235 return 0;
1236}
1237
514f1c67
ACM
1238static int trace__read_syscall_info(struct trace *trace, int id)
1239{
1240 char tp_name[128];
1241 struct syscall *sc;
fd0db102 1242 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1243
1244 if (name == NULL)
1245 return -1;
514f1c67
ACM
1246
1247 if (id > trace->syscalls.max) {
1248 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1249
1250 if (nsyscalls == NULL)
1251 return -1;
1252
1253 if (trace->syscalls.max != -1) {
1254 memset(nsyscalls + trace->syscalls.max + 1, 0,
1255 (id - trace->syscalls.max) * sizeof(*sc));
1256 } else {
1257 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1258 }
1259
1260 trace->syscalls.table = nsyscalls;
1261 trace->syscalls.max = id;
1262 }
1263
1264 sc = trace->syscalls.table + id;
3a531260 1265 sc->name = name;
2ae3a312 1266
3a531260 1267 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1268
aec1930b 1269 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1270 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1271
8dd2a131 1272 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1273 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1274 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1275 }
514f1c67 1276
5e58fcfa
ACM
1277 if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
1278 return -1;
1279
8dd2a131 1280 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1281 return -1;
1282
f208bd8d 1283 sc->args = sc->tp_format->format.fields;
c42de706
TS
1284 /*
1285 * We need to check and discard the first variable '__syscall_nr'
1286 * or 'nr' that mean the syscall number. It is needless here.
1287 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1288 */
1289 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1290 sc->args = sc->args->next;
1291 --sc->nr_args;
1292 }
1293
5089f20e
ACM
1294 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1295
13d4ff3e 1296 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1297}
1298
d0cc439b
ACM
1299static int trace__validate_ev_qualifier(struct trace *trace)
1300{
8b3ce757 1301 int err = 0, i;
27702bcf 1302 size_t nr_allocated;
d0cc439b
ACM
1303 struct str_node *pos;
1304
8b3ce757
ACM
1305 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1306 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1307 sizeof(trace->ev_qualifier_ids.entries[0]));
1308
1309 if (trace->ev_qualifier_ids.entries == NULL) {
1310 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1311 trace->output);
1312 err = -EINVAL;
1313 goto out;
1314 }
1315
27702bcf 1316 nr_allocated = trace->ev_qualifier_ids.nr;
8b3ce757
ACM
1317 i = 0;
1318
602a1f4d 1319 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1320 const char *sc = pos->s;
27702bcf 1321 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
d0cc439b 1322
8b3ce757 1323 if (id < 0) {
27702bcf
ACM
1324 id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
1325 if (id >= 0)
1326 goto matches;
1327
d0cc439b
ACM
1328 if (err == 0) {
1329 fputs("Error:\tInvalid syscall ", trace->output);
1330 err = -EINVAL;
1331 } else {
1332 fputs(", ", trace->output);
1333 }
1334
1335 fputs(sc, trace->output);
1336 }
27702bcf 1337matches:
8b3ce757 1338 trace->ev_qualifier_ids.entries[i++] = id;
27702bcf
ACM
1339 if (match_next == -1)
1340 continue;
1341
1342 while (1) {
1343 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1344 if (id < 0)
1345 break;
1346 if (nr_allocated == trace->ev_qualifier_ids.nr) {
1347 void *entries;
1348
1349 nr_allocated += 8;
1350 entries = realloc(trace->ev_qualifier_ids.entries,
1351 nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
1352 if (entries == NULL) {
1353 err = -ENOMEM;
1354 fputs("\nError:\t Not enough memory for parsing\n", trace->output);
1355 goto out_free;
1356 }
1357 trace->ev_qualifier_ids.entries = entries;
1358 }
1359 trace->ev_qualifier_ids.nr++;
1360 trace->ev_qualifier_ids.entries[i++] = id;
1361 }
d0cc439b
ACM
1362 }
1363
1364 if (err < 0) {
1365 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1366 "\nHint:\tand: 'man syscalls'\n", trace->output);
27702bcf 1367out_free:
8b3ce757
ACM
1368 zfree(&trace->ev_qualifier_ids.entries);
1369 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1370 }
8b3ce757 1371out:
d0cc439b
ACM
1372 return err;
1373}
1374
55d43bca
DA
1375/*
1376 * args is to be interpreted as a series of longs but we need to handle
1377 * 8-byte unaligned accesses. args points to raw_data within the event
1378 * and raw_data is guaranteed to be 8-byte unaligned because it is
1379 * preceded by raw_size which is a u32. So we need to copy args to a temp
1380 * variable to read it. Most notably this avoids extended load instructions
1381 * on unaligned addresses
1382 */
325f5091 1383unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
f9f83b33
ACM
1384{
1385 unsigned long val;
325f5091 1386 unsigned char *p = arg->args + sizeof(unsigned long) * idx;
f9f83b33
ACM
1387
1388 memcpy(&val, p, sizeof(val));
1389 return val;
1390}
1391
c51bdfec
ACM
1392static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
1393 struct syscall_arg *arg)
1394{
1395 if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
1396 return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
1397
1398 return scnprintf(bf, size, "arg%d: ", arg->idx);
1399}
1400
d032d79e
ACM
1401static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
1402 struct syscall_arg *arg, unsigned long val)
1403{
1404 if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
1405 arg->val = val;
1406 if (sc->arg_fmt[arg->idx].parm)
1407 arg->parm = sc->arg_fmt[arg->idx].parm;
1408 return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
1409 }
1410 return scnprintf(bf, size, "%ld", val);
1411}
1412
752fde44 1413static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1414 unsigned char *args, struct trace *trace,
75b757ca 1415 struct thread *thread)
514f1c67 1416{
514f1c67 1417 size_t printed = 0;
55d43bca 1418 unsigned long val;
d032d79e
ACM
1419 u8 bit = 1;
1420 struct syscall_arg arg = {
1421 .args = args,
1422 .idx = 0,
1423 .mask = 0,
1424 .trace = trace,
1425 .thread = thread,
1426 };
84486caa
ACM
1427 struct thread_trace *ttrace = thread__priv(thread);
1428
1429 /*
1430 * Things like fcntl will set this in its 'cmd' formatter to pick the
1431 * right formatter for the return value (an fd? file flags?), which is
1432 * not needed for syscalls that always return a given type, say an fd.
1433 */
1434 ttrace->ret_scnprintf = NULL;
514f1c67 1435
f208bd8d 1436 if (sc->args != NULL) {
514f1c67 1437 struct format_field *field;
6e7eeb51 1438
f208bd8d 1439 for (field = sc->args; field;
01533e97
ACM
1440 field = field->next, ++arg.idx, bit <<= 1) {
1441 if (arg.mask & bit)
6e7eeb51 1442 continue;
55d43bca 1443
f9f83b33 1444 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1445
4aa58232
ACM
1446 /*
1447 * Suppress this argument if its value is zero and
1448 * and we don't have a string associated in an
1449 * strarray for it.
1450 */
55d43bca 1451 if (val == 0 &&
82d4a110 1452 !(sc->arg_fmt &&
d47737d5
ACM
1453 (sc->arg_fmt[arg.idx].show_zero ||
1454 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1455 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1456 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1457 continue;
1458
752fde44 1459 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1460 "%s%s: ", printed ? ", " : "", field->name);
d032d79e 1461 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
514f1c67 1462 }
4c4d6e51
ACM
1463 } else if (IS_ERR(sc->tp_format)) {
1464 /*
1465 * If we managed to read the tracepoint /format file, then we
1466 * may end up not having any args, like with gettid(), so only
1467 * print the raw args when we didn't manage to read it.
1468 */
332337da 1469 while (arg.idx < sc->nr_args) {
d032d79e
ACM
1470 if (arg.mask & bit)
1471 goto next_arg;
1472 val = syscall_arg__val(&arg, arg.idx);
c51bdfec
ACM
1473 if (printed)
1474 printed += scnprintf(bf + printed, size - printed, ", ");
1475 printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
d032d79e
ACM
1476 printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
1477next_arg:
1478 ++arg.idx;
1479 bit <<= 1;
514f1c67
ACM
1480 }
1481 }
1482
1483 return printed;
1484}
1485
ba3d7dee 1486typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1487 union perf_event *event,
ba3d7dee
ACM
1488 struct perf_sample *sample);
1489
1490static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1491 struct perf_evsel *evsel, int id)
ba3d7dee 1492{
ba3d7dee
ACM
1493
1494 if (id < 0) {
adaa18bf
ACM
1495
1496 /*
1497 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1498 * before that, leaving at a higher verbosity level till that is
1499 * explained. Reproduced with plain ftrace with:
1500 *
1501 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1502 * grep "NR -1 " /t/trace_pipe
1503 *
1504 * After generating some load on the machine.
1505 */
1506 if (verbose > 1) {
1507 static u64 n;
1508 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1509 id, perf_evsel__name(evsel), ++n);
1510 }
ba3d7dee
ACM
1511 return NULL;
1512 }
1513
1514 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1515 trace__read_syscall_info(trace, id))
1516 goto out_cant_read;
1517
1518 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1519 goto out_cant_read;
1520
1521 return &trace->syscalls.table[id];
1522
1523out_cant_read:
bb963e16 1524 if (verbose > 0) {
7c304ee0
ACM
1525 fprintf(trace->output, "Problems reading syscall %d", id);
1526 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1527 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1528 fputs(" information\n", trace->output);
1529 }
ba3d7dee
ACM
1530 return NULL;
1531}
1532
bf2575c1
DA
1533static void thread__update_stats(struct thread_trace *ttrace,
1534 int id, struct perf_sample *sample)
1535{
1536 struct int_node *inode;
1537 struct stats *stats;
1538 u64 duration = 0;
1539
1540 inode = intlist__findnew(ttrace->syscall_stats, id);
1541 if (inode == NULL)
1542 return;
1543
1544 stats = inode->priv;
1545 if (stats == NULL) {
1546 stats = malloc(sizeof(struct stats));
1547 if (stats == NULL)
1548 return;
1549 init_stats(stats);
1550 inode->priv = stats;
1551 }
1552
1553 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1554 duration = sample->time - ttrace->entry_time;
1555
1556 update_stats(stats, duration);
1557}
1558
e596663e
ACM
1559static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1560{
1561 struct thread_trace *ttrace;
1562 u64 duration;
1563 size_t printed;
1564
1565 if (trace->current == NULL)
1566 return 0;
1567
1568 ttrace = thread__priv(trace->current);
1569
1570 if (!ttrace->entry_pending)
1571 return 0;
1572
1573 duration = sample->time - ttrace->entry_time;
1574
fd2b2975 1575 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1576 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1577 ttrace->entry_pending = false;
1578
1579 return printed;
1580}
1581
591421e1
ACM
1582static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
1583 struct perf_sample *sample, struct thread *thread)
1584{
1585 int printed = 0;
1586
1587 if (trace->print_sample) {
1588 double ts = (double)sample->time / NSEC_PER_MSEC;
1589
1590 printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
1591 perf_evsel__name(evsel), ts,
1592 thread__comm_str(thread),
1593 sample->pid, sample->tid, sample->cpu);
1594 }
1595
1596 return printed;
1597}
1598
ba3d7dee 1599static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1600 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1601 struct perf_sample *sample)
1602{
752fde44 1603 char *msg;
ba3d7dee 1604 void *args;
752fde44 1605 size_t printed = 0;
2ae3a312 1606 struct thread *thread;
b91fc39f 1607 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1608 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1609 struct thread_trace *ttrace;
1610
1611 if (sc == NULL)
1612 return -1;
ba3d7dee 1613
8fb598e5 1614 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1615 ttrace = thread__trace(thread, trace->output);
2ae3a312 1616 if (ttrace == NULL)
b91fc39f 1617 goto out_put;
ba3d7dee 1618
591421e1
ACM
1619 trace__fprintf_sample(trace, evsel, sample, thread);
1620
77170988 1621 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1622
1623 if (ttrace->entry_str == NULL) {
e4d44e83 1624 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1625 if (!ttrace->entry_str)
b91fc39f 1626 goto out_put;
752fde44
ACM
1627 }
1628
5cf9c84e 1629 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1630 trace__printf_interrupted_entry(trace, sample);
e596663e 1631
752fde44
ACM
1632 ttrace->entry_time = sample->time;
1633 msg = ttrace->entry_str;
e4d44e83 1634 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1635
e4d44e83 1636 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1637 args, trace, thread);
752fde44 1638
5089f20e 1639 if (sc->is_exit) {
5cf9c84e 1640 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1641 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1642 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1643 }
7f4f8001 1644 } else {
752fde44 1645 ttrace->entry_pending = true;
7f4f8001
ACM
1646 /* See trace__vfs_getname & trace__sys_exit */
1647 ttrace->filename.pending_open = false;
1648 }
ba3d7dee 1649
f3b623b8
ACM
1650 if (trace->current != thread) {
1651 thread__put(trace->current);
1652 trace->current = thread__get(thread);
1653 }
b91fc39f
ACM
1654 err = 0;
1655out_put:
1656 thread__put(thread);
1657 return err;
ba3d7dee
ACM
1658}
1659
5cf9c84e
ACM
1660static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1661 struct perf_sample *sample,
1662 struct callchain_cursor *cursor)
202ff968
ACM
1663{
1664 struct addr_location al;
5cf9c84e
ACM
1665
1666 if (machine__resolve(trace->host, &al, sample) < 0 ||
bd3dda9a 1667 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack))
5cf9c84e
ACM
1668 return -1;
1669
1670 return 0;
1671}
1672
1673static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1674{
202ff968 1675 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1676 const unsigned int print_opts = EVSEL__PRINT_SYM |
1677 EVSEL__PRINT_DSO |
1678 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1679
d327e60c 1680 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1681}
1682
092bd3cd
HB
1683static const char *errno_to_name(struct perf_evsel *evsel, int err)
1684{
1685 struct perf_env *env = perf_evsel__env(evsel);
1686 const char *arch_name = perf_env__arch(env);
1687
1688 return arch_syscalls__strerrno(arch_name, err);
1689}
1690
ba3d7dee 1691static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1692 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1693 struct perf_sample *sample)
1694{
2c82c3ad 1695 long ret;
60c907ab 1696 u64 duration = 0;
fd2b2975 1697 bool duration_calculated = false;
2ae3a312 1698 struct thread *thread;
5cf9c84e 1699 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1700 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1701 struct thread_trace *ttrace;
1702
1703 if (sc == NULL)
1704 return -1;
ba3d7dee 1705
8fb598e5 1706 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1707 ttrace = thread__trace(thread, trace->output);
2ae3a312 1708 if (ttrace == NULL)
b91fc39f 1709 goto out_put;
ba3d7dee 1710
591421e1
ACM
1711 trace__fprintf_sample(trace, evsel, sample, thread);
1712
bf2575c1
DA
1713 if (trace->summary)
1714 thread__update_stats(ttrace, id, sample);
1715
77170988 1716 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1717
fd0db102 1718 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1719 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1720 ttrace->filename.pending_open = false;
c522739d
ACM
1721 ++trace->stats.vfs_getname;
1722 }
1723
ae9ed035 1724 if (ttrace->entry_time) {
60c907ab 1725 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1726 if (trace__filter_duration(trace, duration))
1727 goto out;
fd2b2975 1728 duration_calculated = true;
ae9ed035
ACM
1729 } else if (trace->duration_filter)
1730 goto out;
60c907ab 1731
5cf9c84e
ACM
1732 if (sample->callchain) {
1733 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1734 if (callchain_ret == 0) {
1735 if (callchain_cursor.nr < trace->min_stack)
1736 goto out;
1737 callchain_ret = 1;
1738 }
1739 }
1740
fd2eabaf
DA
1741 if (trace->summary_only)
1742 goto out;
1743
fd2b2975 1744 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1745
1746 if (ttrace->entry_pending) {
c24ff998 1747 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1748 } else {
c24ff998
ACM
1749 fprintf(trace->output, " ... [");
1750 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1751 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1752 }
1753
da3c9a44 1754 if (sc->fmt == NULL) {
1f63139c
ACM
1755 if (ret < 0)
1756 goto errno_print;
da3c9a44 1757signed_print:
6f8fe61e 1758 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1759 } else if (ret < 0) {
1760errno_print: {
942a91ed 1761 char bf[STRERR_BUFSIZE];
c8b5f2c9 1762 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
092bd3cd 1763 *e = errno_to_name(evsel, -ret);
ba3d7dee 1764
c24ff998 1765 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1766 }
da3c9a44 1767 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1768 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1769 else if (ttrace->ret_scnprintf) {
1770 char bf[1024];
7ee57434
ACM
1771 struct syscall_arg arg = {
1772 .val = ret,
1773 .thread = thread,
1774 .trace = trace,
1775 };
1776 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1777 ttrace->ret_scnprintf = NULL;
1778 fprintf(trace->output, ") = %s", bf);
1779 } else if (sc->fmt->hexret)
2c82c3ad 1780 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1781 else if (sc->fmt->errpid) {
1782 struct thread *child = machine__find_thread(trace->host, ret, ret);
1783
1784 if (child != NULL) {
1785 fprintf(trace->output, ") = %ld", ret);
1786 if (child->comm_set)
1787 fprintf(trace->output, " (%s)", thread__comm_str(child));
1788 thread__put(child);
1789 }
1790 } else
da3c9a44 1791 goto signed_print;
ba3d7dee 1792
c24ff998 1793 fputc('\n', trace->output);
566a0885 1794
5cf9c84e
ACM
1795 if (callchain_ret > 0)
1796 trace__fprintf_callchain(trace, sample);
1797 else if (callchain_ret < 0)
1798 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1799out:
752fde44 1800 ttrace->entry_pending = false;
b91fc39f
ACM
1801 err = 0;
1802out_put:
1803 thread__put(thread);
1804 return err;
ba3d7dee
ACM
1805}
1806
c522739d 1807static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1808 union perf_event *event __maybe_unused,
c522739d
ACM
1809 struct perf_sample *sample)
1810{
f994592d
ACM
1811 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1812 struct thread_trace *ttrace;
1813 size_t filename_len, entry_str_len, to_move;
1814 ssize_t remaining_space;
1815 char *pos;
7f4f8001 1816 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1817
1818 if (!thread)
1819 goto out;
1820
1821 ttrace = thread__priv(thread);
1822 if (!ttrace)
ef65e96e 1823 goto out_put;
f994592d 1824
7f4f8001 1825 filename_len = strlen(filename);
39f0e7a8 1826 if (filename_len == 0)
ef65e96e 1827 goto out_put;
7f4f8001
ACM
1828
1829 if (ttrace->filename.namelen < filename_len) {
1830 char *f = realloc(ttrace->filename.name, filename_len + 1);
1831
1832 if (f == NULL)
ef65e96e 1833 goto out_put;
7f4f8001
ACM
1834
1835 ttrace->filename.namelen = filename_len;
1836 ttrace->filename.name = f;
1837 }
1838
1839 strcpy(ttrace->filename.name, filename);
1840 ttrace->filename.pending_open = true;
1841
f994592d 1842 if (!ttrace->filename.ptr)
ef65e96e 1843 goto out_put;
f994592d
ACM
1844
1845 entry_str_len = strlen(ttrace->entry_str);
1846 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1847 if (remaining_space <= 0)
ef65e96e 1848 goto out_put;
f994592d 1849
f994592d
ACM
1850 if (filename_len > (size_t)remaining_space) {
1851 filename += filename_len - remaining_space;
1852 filename_len = remaining_space;
1853 }
1854
1855 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1856 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1857 memmove(pos + filename_len, pos, to_move);
1858 memcpy(pos, filename, filename_len);
1859
1860 ttrace->filename.ptr = 0;
1861 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1862out_put:
1863 thread__put(thread);
f994592d 1864out:
c522739d
ACM
1865 return 0;
1866}
1867
1302d88e 1868static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1869 union perf_event *event __maybe_unused,
1302d88e
ACM
1870 struct perf_sample *sample)
1871{
1872 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1873 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1874 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1875 sample->pid,
1876 sample->tid);
c24ff998 1877 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1878
1879 if (ttrace == NULL)
1880 goto out_dump;
1881
1882 ttrace->runtime_ms += runtime_ms;
1883 trace->runtime_ms += runtime_ms;
ef65e96e 1884out_put:
b91fc39f 1885 thread__put(thread);
1302d88e
ACM
1886 return 0;
1887
1888out_dump:
c24ff998 1889 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1890 evsel->name,
1891 perf_evsel__strval(evsel, sample, "comm"),
1892 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1893 runtime,
1894 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1895 goto out_put;
1302d88e
ACM
1896}
1897
923d0c9a
ACM
1898static int bpf_output__printer(enum binary_printer_ops op,
1899 unsigned int val, void *extra __maybe_unused, FILE *fp)
1d6c9407 1900{
1d6c9407
WN
1901 unsigned char ch = (unsigned char)val;
1902
1903 switch (op) {
1904 case BINARY_PRINT_CHAR_DATA:
923d0c9a 1905 return fprintf(fp, "%c", isprint(ch) ? ch : '.');
1d6c9407
WN
1906 case BINARY_PRINT_DATA_BEGIN:
1907 case BINARY_PRINT_LINE_BEGIN:
1908 case BINARY_PRINT_ADDR:
1909 case BINARY_PRINT_NUM_DATA:
1910 case BINARY_PRINT_NUM_PAD:
1911 case BINARY_PRINT_SEP:
1912 case BINARY_PRINT_CHAR_PAD:
1913 case BINARY_PRINT_LINE_END:
1914 case BINARY_PRINT_DATA_END:
1915 default:
1916 break;
1917 }
923d0c9a
ACM
1918
1919 return 0;
1d6c9407
WN
1920}
1921
1922static void bpf_output__fprintf(struct trace *trace,
1923 struct perf_sample *sample)
1924{
923d0c9a
ACM
1925 binary__fprintf(sample->raw_data, sample->raw_size, 8,
1926 bpf_output__printer, NULL, trace->output);
1d6c9407
WN
1927}
1928
14a052df
ACM
1929static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1930 union perf_event *event __maybe_unused,
1931 struct perf_sample *sample)
1932{
7ad35615
ACM
1933 int callchain_ret = 0;
1934
1935 if (sample->callchain) {
1936 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1937 if (callchain_ret == 0) {
1938 if (callchain_cursor.nr < trace->min_stack)
1939 goto out;
1940 callchain_ret = 1;
1941 }
1942 }
1943
14a052df
ACM
1944 trace__printf_interrupted_entry(trace, sample);
1945 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1946
1947 if (trace->trace_syscalls)
1948 fprintf(trace->output, "( ): ");
1949
1950 fprintf(trace->output, "%s:", evsel->name);
14a052df 1951
1d6c9407
WN
1952 if (perf_evsel__is_bpf_output(evsel)) {
1953 bpf_output__fprintf(trace, sample);
1954 } else if (evsel->tp_format) {
14a052df
ACM
1955 event_format__fprintf(evsel->tp_format, sample->cpu,
1956 sample->raw_data, sample->raw_size,
1957 trace->output);
1958 }
1959
1960 fprintf(trace->output, ")\n");
202ff968 1961
7ad35615
ACM
1962 if (callchain_ret > 0)
1963 trace__fprintf_callchain(trace, sample);
1964 else if (callchain_ret < 0)
1965 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1966out:
14a052df
ACM
1967 return 0;
1968}
1969
598d02c5
SF
1970static void print_location(FILE *f, struct perf_sample *sample,
1971 struct addr_location *al,
1972 bool print_dso, bool print_sym)
1973{
1974
bb963e16 1975 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1976 fprintf(f, "%s@", al->map->dso->long_name);
1977
bb963e16 1978 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1979 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1980 al->addr - al->sym->start);
1981 else if (al->map)
4414a3c5 1982 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1983 else
4414a3c5 1984 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1985}
1986
1987static int trace__pgfault(struct trace *trace,
1988 struct perf_evsel *evsel,
473398a2 1989 union perf_event *event __maybe_unused,
598d02c5
SF
1990 struct perf_sample *sample)
1991{
1992 struct thread *thread;
598d02c5
SF
1993 struct addr_location al;
1994 char map_type = 'd';
a2ea67d7 1995 struct thread_trace *ttrace;
b91fc39f 1996 int err = -1;
1df54290 1997 int callchain_ret = 0;
598d02c5
SF
1998
1999 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
2000
2001 if (sample->callchain) {
2002 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
2003 if (callchain_ret == 0) {
2004 if (callchain_cursor.nr < trace->min_stack)
2005 goto out_put;
2006 callchain_ret = 1;
2007 }
2008 }
2009
a2ea67d7
SF
2010 ttrace = thread__trace(thread, trace->output);
2011 if (ttrace == NULL)
b91fc39f 2012 goto out_put;
a2ea67d7
SF
2013
2014 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
2015 ttrace->pfmaj++;
2016 else
2017 ttrace->pfmin++;
2018
2019 if (trace->summary_only)
b91fc39f 2020 goto out;
598d02c5 2021
473398a2 2022 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
2023 sample->ip, &al);
2024
fd2b2975 2025 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
2026
2027 fprintf(trace->output, "%sfault [",
2028 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
2029 "maj" : "min");
2030
2031 print_location(trace->output, sample, &al, false, true);
2032
2033 fprintf(trace->output, "] => ");
2034
473398a2 2035 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
2036 sample->addr, &al);
2037
2038 if (!al.map) {
473398a2 2039 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
2040 MAP__FUNCTION, sample->addr, &al);
2041
2042 if (al.map)
2043 map_type = 'x';
2044 else
2045 map_type = '?';
2046 }
2047
2048 print_location(trace->output, sample, &al, true, false);
2049
2050 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 2051
1df54290
ACM
2052 if (callchain_ret > 0)
2053 trace__fprintf_callchain(trace, sample);
2054 else if (callchain_ret < 0)
2055 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
2056out:
2057 err = 0;
2058out_put:
2059 thread__put(thread);
2060 return err;
598d02c5
SF
2061}
2062
e6001980 2063static void trace__set_base_time(struct trace *trace,
8a07a809 2064 struct perf_evsel *evsel,
e6001980
ACM
2065 struct perf_sample *sample)
2066{
8a07a809
ACM
2067 /*
2068 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
2069 * and don't use sample->time unconditionally, we may end up having
2070 * some other event in the future without PERF_SAMPLE_TIME for good
2071 * reason, i.e. we may not be interested in its timestamps, just in
2072 * it taking place, picking some piece of information when it
2073 * appears in our event stream (vfs_getname comes to mind).
2074 */
2075 if (trace->base_time == 0 && !trace->full_time &&
2076 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
2077 trace->base_time = sample->time;
2078}
2079
6810fc91 2080static int trace__process_sample(struct perf_tool *tool,
0c82adcf 2081 union perf_event *event,
6810fc91
DA
2082 struct perf_sample *sample,
2083 struct perf_evsel *evsel,
2084 struct machine *machine __maybe_unused)
2085{
2086 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 2087 struct thread *thread;
6810fc91
DA
2088 int err = 0;
2089
744a9719 2090 tracepoint_handler handler = evsel->handler;
6810fc91 2091
aa07df6e
DA
2092 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
2093 if (thread && thread__is_filtered(thread))
ef65e96e 2094 goto out;
bdc89661 2095
e6001980 2096 trace__set_base_time(trace, evsel, sample);
6810fc91 2097
3160565f
DA
2098 if (handler) {
2099 ++trace->nr_events;
0c82adcf 2100 handler(trace, evsel, event, sample);
3160565f 2101 }
ef65e96e
ACM
2102out:
2103 thread__put(thread);
6810fc91
DA
2104 return err;
2105}
2106
1e28fe0a 2107static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2108{
2109 unsigned int rec_argc, i, j;
2110 const char **rec_argv;
2111 const char * const record_args[] = {
2112 "record",
2113 "-R",
2114 "-m", "1024",
2115 "-c", "1",
5e2485b1
DA
2116 };
2117
1e28fe0a
SF
2118 const char * const sc_args[] = { "-e", };
2119 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2120 const char * const majpf_args[] = { "-e", "major-faults" };
2121 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2122 const char * const minpf_args[] = { "-e", "minor-faults" };
2123 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2124
9aca7f17 2125 /* +1 is for the event string below */
1e28fe0a
SF
2126 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2127 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2128 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2129
2130 if (rec_argv == NULL)
2131 return -ENOMEM;
2132
1e28fe0a 2133 j = 0;
5e2485b1 2134 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2135 rec_argv[j++] = record_args[i];
2136
e281a960
SF
2137 if (trace->trace_syscalls) {
2138 for (i = 0; i < sc_args_nr; i++)
2139 rec_argv[j++] = sc_args[i];
2140
2141 /* event string may be different for older kernels - e.g., RHEL6 */
2142 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2143 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2144 else if (is_valid_tracepoint("syscalls:sys_enter"))
2145 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2146 else {
2147 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
c896f85a 2148 free(rec_argv);
e281a960
SF
2149 return -1;
2150 }
9aca7f17 2151 }
9aca7f17 2152
1e28fe0a
SF
2153 if (trace->trace_pgfaults & TRACE_PFMAJ)
2154 for (i = 0; i < majpf_args_nr; i++)
2155 rec_argv[j++] = majpf_args[i];
2156
2157 if (trace->trace_pgfaults & TRACE_PFMIN)
2158 for (i = 0; i < minpf_args_nr; i++)
2159 rec_argv[j++] = minpf_args[i];
2160
2161 for (i = 0; i < (unsigned int)argc; i++)
2162 rec_argv[j++] = argv[i];
5e2485b1 2163
b0ad8ea6 2164 return cmd_record(j, rec_argv);
5e2485b1
DA
2165}
2166
bf2575c1
DA
2167static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2168
08c98776 2169static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2170{
ef503831 2171 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2172
2173 if (IS_ERR(evsel))
08c98776 2174 return false;
c522739d
ACM
2175
2176 if (perf_evsel__field(evsel, "pathname") == NULL) {
2177 perf_evsel__delete(evsel);
08c98776 2178 return false;
c522739d
ACM
2179 }
2180
744a9719 2181 evsel->handler = trace__vfs_getname;
c522739d 2182 perf_evlist__add(evlist, evsel);
08c98776 2183 return true;
c522739d
ACM
2184}
2185
0ae537cb 2186static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2187{
2188 struct perf_evsel *evsel;
2189 struct perf_event_attr attr = {
2190 .type = PERF_TYPE_SOFTWARE,
2191 .mmap_data = 1,
598d02c5
SF
2192 };
2193
2194 attr.config = config;
0524798c 2195 attr.sample_period = 1;
598d02c5
SF
2196
2197 event_attr_init(&attr);
2198
2199 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2200 if (evsel)
2201 evsel->handler = trace__pgfault;
598d02c5 2202
0ae537cb 2203 return evsel;
598d02c5
SF
2204}
2205
ddbb1b13
ACM
2206static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2207{
2208 const u32 type = event->header.type;
2209 struct perf_evsel *evsel;
2210
ddbb1b13
ACM
2211 if (type != PERF_RECORD_SAMPLE) {
2212 trace__process_event(trace, trace->host, event, sample);
2213 return;
2214 }
2215
2216 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2217 if (evsel == NULL) {
2218 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2219 return;
2220 }
2221
e6001980
ACM
2222 trace__set_base_time(trace, evsel, sample);
2223
ddbb1b13
ACM
2224 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2225 sample->raw_data == NULL) {
2226 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2227 perf_evsel__name(evsel), sample->tid,
2228 sample->cpu, sample->raw_size);
2229 } else {
2230 tracepoint_handler handler = evsel->handler;
2231 handler(trace, evsel, event, sample);
2232 }
2233}
2234
c27366f0
ACM
2235static int trace__add_syscall_newtp(struct trace *trace)
2236{
2237 int ret = -1;
2238 struct perf_evlist *evlist = trace->evlist;
2239 struct perf_evsel *sys_enter, *sys_exit;
2240
2241 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2242 if (sys_enter == NULL)
2243 goto out;
2244
2245 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2246 goto out_delete_sys_enter;
2247
2248 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2249 if (sys_exit == NULL)
2250 goto out_delete_sys_enter;
2251
2252 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2253 goto out_delete_sys_exit;
2254
08e26396
ACM
2255 perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
2256 perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
2257
c27366f0
ACM
2258 perf_evlist__add(evlist, sys_enter);
2259 perf_evlist__add(evlist, sys_exit);
2260
2ddd5c04 2261 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2262 /*
2263 * We're interested only in the user space callchain
2264 * leading to the syscall, allow overriding that for
2265 * debugging reasons using --kernel_syscall_callchains
2266 */
2267 sys_exit->attr.exclude_callchain_kernel = 1;
2268 }
2269
8b3ce757
ACM
2270 trace->syscalls.events.sys_enter = sys_enter;
2271 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2272
2273 ret = 0;
2274out:
2275 return ret;
2276
2277out_delete_sys_exit:
2278 perf_evsel__delete_priv(sys_exit);
2279out_delete_sys_enter:
2280 perf_evsel__delete_priv(sys_enter);
2281 goto out;
2282}
2283
19867b61
ACM
2284static int trace__set_ev_qualifier_filter(struct trace *trace)
2285{
2286 int err = -1;
b15d0a4c 2287 struct perf_evsel *sys_exit;
19867b61
ACM
2288 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2289 trace->ev_qualifier_ids.nr,
2290 trace->ev_qualifier_ids.entries);
2291
2292 if (filter == NULL)
2293 goto out_enomem;
2294
3541c034
MP
2295 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2296 filter)) {
b15d0a4c 2297 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2298 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2299 }
19867b61
ACM
2300
2301 free(filter);
2302out:
2303 return err;
2304out_enomem:
2305 errno = ENOMEM;
2306 goto out;
2307}
c27366f0 2308
dd1a5037
ACM
2309static int trace__set_filter_loop_pids(struct trace *trace)
2310{
082ab9a1 2311 unsigned int nr = 1;
dd1a5037
ACM
2312 pid_t pids[32] = {
2313 getpid(),
2314 };
082ab9a1
ACM
2315 struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
2316
2317 while (thread && nr < ARRAY_SIZE(pids)) {
2318 struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
2319
2320 if (parent == NULL)
2321 break;
2322
2323 if (!strcmp(thread__comm_str(parent), "sshd")) {
2324 pids[nr++] = parent->tid;
2325 break;
2326 }
2327 thread = parent;
2328 }
dd1a5037
ACM
2329
2330 return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
2331}
2332
f15eb531 2333static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2334{
14a052df 2335 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2336 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2337 int err = -1, i;
2338 unsigned long before;
f15eb531 2339 const bool forks = argc > 0;
46fb3c21 2340 bool draining = false;
514f1c67 2341
75b757ca
ACM
2342 trace->live = true;
2343
c27366f0 2344 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2345 goto out_error_raw_syscalls;
514f1c67 2346
e281a960 2347 if (trace->trace_syscalls)
08c98776 2348 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2349
0ae537cb
ACM
2350 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2351 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2352 if (pgfault_maj == NULL)
2353 goto out_error_mem;
08e26396 2354 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
0ae537cb 2355 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2356 }
598d02c5 2357
0ae537cb
ACM
2358 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2359 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2360 if (pgfault_min == NULL)
2361 goto out_error_mem;
08e26396 2362 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
0ae537cb
ACM
2363 perf_evlist__add(evlist, pgfault_min);
2364 }
598d02c5 2365
1302d88e 2366 if (trace->sched &&
2cc990ba
ACM
2367 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2368 trace__sched_stat_runtime))
2369 goto out_error_sched_stat_runtime;
1302d88e 2370
514f1c67
ACM
2371 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2372 if (err < 0) {
c24ff998 2373 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2374 goto out_delete_evlist;
2375 }
2376
752fde44
ACM
2377 err = trace__symbols_init(trace, evlist);
2378 if (err < 0) {
c24ff998 2379 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2380 goto out_delete_evlist;
752fde44
ACM
2381 }
2382
75d50117 2383 perf_evlist__config(evlist, &trace->opts, &callchain_param);
fde54b78 2384
f15eb531
NK
2385 signal(SIGCHLD, sig_handler);
2386 signal(SIGINT, sig_handler);
2387
2388 if (forks) {
6ef73ec4 2389 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2390 argv, false, NULL);
f15eb531 2391 if (err < 0) {
c24ff998 2392 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2393 goto out_delete_evlist;
f15eb531
NK
2394 }
2395 }
2396
514f1c67 2397 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2398 if (err < 0)
2399 goto out_error_open;
514f1c67 2400
ba504235
WN
2401 err = bpf__apply_obj_config();
2402 if (err) {
2403 char errbuf[BUFSIZ];
2404
2405 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2406 pr_err("ERROR: Apply config to BPF failed: %s\n",
2407 errbuf);
2408 goto out_error_open;
2409 }
2410
241b057c
ACM
2411 /*
2412 * Better not use !target__has_task() here because we need to cover the
2413 * case where no threads were specified in the command line, but a
2414 * workload was, and in that case we will fill in the thread_map when
2415 * we fork the workload in perf_evlist__prepare_workload.
2416 */
f078c385
ACM
2417 if (trace->filter_pids.nr > 0)
2418 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2419 else if (thread_map__pid(evlist->threads, 0) == -1)
dd1a5037 2420 err = trace__set_filter_loop_pids(trace);
f078c385 2421
94ad89bc
ACM
2422 if (err < 0)
2423 goto out_error_mem;
2424
19867b61
ACM
2425 if (trace->ev_qualifier_ids.nr > 0) {
2426 err = trace__set_ev_qualifier_filter(trace);
2427 if (err < 0)
2428 goto out_errno;
19867b61 2429
2e5e5f87
ACM
2430 pr_debug("event qualifier tracepoint filter: %s\n",
2431 trace->syscalls.events.sys_exit->filter);
2432 }
19867b61 2433
94ad89bc
ACM
2434 err = perf_evlist__apply_filters(evlist, &evsel);
2435 if (err < 0)
2436 goto out_error_apply_filters;
241b057c 2437
f74b9d3a 2438 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
e09b18d4
ACM
2439 if (err < 0)
2440 goto out_error_mmap;
514f1c67 2441
e36b7821 2442 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2443 perf_evlist__enable(evlist);
2444
f15eb531
NK
2445 if (forks)
2446 perf_evlist__start_workload(evlist);
2447
e36b7821
AB
2448 if (trace->opts.initial_delay) {
2449 usleep(trace->opts.initial_delay * 1000);
2450 perf_evlist__enable(evlist);
2451 }
2452
e13798c7 2453 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2454 evlist->threads->nr > 1 ||
2455 perf_evlist__first(evlist)->attr.inherit;
bd3dda9a
ACM
2456
2457 /*
2458 * Now that we already used evsel->attr to ask the kernel to setup the
2459 * events, lets reuse evsel->attr.sample_max_stack as the limit in
2460 * trace__resolve_callchain(), allowing per-event max-stack settings
2461 * to override an explicitely set --max-stack global setting.
2462 */
2463 evlist__for_each_entry(evlist, evsel) {
2464 if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) &&
2465 evsel->attr.sample_max_stack == 0)
2466 evsel->attr.sample_max_stack = trace->max_stack;
2467 }
514f1c67 2468again:
efd5745e 2469 before = trace->nr_events;
514f1c67
ACM
2470
2471 for (i = 0; i < evlist->nr_mmaps; i++) {
2472 union perf_event *event;
2473
2474 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2475 struct perf_sample sample;
514f1c67 2476
efd5745e 2477 ++trace->nr_events;
514f1c67 2478
514f1c67
ACM
2479 err = perf_evlist__parse_sample(evlist, event, &sample);
2480 if (err) {
c24ff998 2481 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2482 goto next_event;
514f1c67
ACM
2483 }
2484
ddbb1b13 2485 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2486next_event:
2487 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2488
ba209f85
ACM
2489 if (interrupted)
2490 goto out_disable;
02ac5421
ACM
2491
2492 if (done && !draining) {
2493 perf_evlist__disable(evlist);
2494 draining = true;
2495 }
514f1c67
ACM
2496 }
2497 }
2498
efd5745e 2499 if (trace->nr_events == before) {
ba209f85 2500 int timeout = done ? 100 : -1;
f15eb531 2501
46fb3c21
ACM
2502 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2503 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2504 draining = true;
2505
ba209f85 2506 goto again;
46fb3c21 2507 }
ba209f85
ACM
2508 } else {
2509 goto again;
f15eb531
NK
2510 }
2511
ba209f85 2512out_disable:
f3b623b8
ACM
2513 thread__zput(trace->current);
2514
ba209f85 2515 perf_evlist__disable(evlist);
514f1c67 2516
c522739d
ACM
2517 if (!err) {
2518 if (trace->summary)
2519 trace__fprintf_thread_summary(trace, trace->output);
2520
2521 if (trace->show_tool_stats) {
2522 fprintf(trace->output, "Stats:\n "
2523 " vfs_getname : %" PRIu64 "\n"
2524 " proc_getname: %" PRIu64 "\n",
2525 trace->stats.vfs_getname,
2526 trace->stats.proc_getname);
2527 }
2528 }
bf2575c1 2529
514f1c67 2530out_delete_evlist:
33974a41
AV
2531 trace__symbols__exit(trace);
2532
514f1c67 2533 perf_evlist__delete(evlist);
14a052df 2534 trace->evlist = NULL;
75b757ca 2535 trace->live = false;
514f1c67 2536 return err;
6ef068cb
ACM
2537{
2538 char errbuf[BUFSIZ];
a8f23d8f 2539
2cc990ba 2540out_error_sched_stat_runtime:
988bdb31 2541 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2542 goto out_error;
2543
801c67b0 2544out_error_raw_syscalls:
988bdb31 2545 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2546 goto out_error;
2547
e09b18d4
ACM
2548out_error_mmap:
2549 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2550 goto out_error;
2551
a8f23d8f
ACM
2552out_error_open:
2553 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2554
2555out_error:
6ef068cb 2556 fprintf(trace->output, "%s\n", errbuf);
87f91868 2557 goto out_delete_evlist;
94ad89bc
ACM
2558
2559out_error_apply_filters:
2560 fprintf(trace->output,
2561 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2562 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2563 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2564 goto out_delete_evlist;
514f1c67 2565}
5ed08dae
ACM
2566out_error_mem:
2567 fprintf(trace->output, "Not enough memory to run!\n");
2568 goto out_delete_evlist;
19867b61
ACM
2569
2570out_errno:
2571 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2572 goto out_delete_evlist;
a8f23d8f 2573}
514f1c67 2574
6810fc91
DA
2575static int trace__replay(struct trace *trace)
2576{
2577 const struct perf_evsel_str_handler handlers[] = {
c522739d 2578 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2579 };
8ceb41d7 2580 struct perf_data data = {
eae8ad80
JO
2581 .file = {
2582 .path = input_name,
2583 },
2584 .mode = PERF_DATA_MODE_READ,
2585 .force = trace->force,
f5fc1412 2586 };
6810fc91 2587 struct perf_session *session;
003824e8 2588 struct perf_evsel *evsel;
6810fc91
DA
2589 int err = -1;
2590
2591 trace->tool.sample = trace__process_sample;
2592 trace->tool.mmap = perf_event__process_mmap;
384c671e 2593 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2594 trace->tool.comm = perf_event__process_comm;
2595 trace->tool.exit = perf_event__process_exit;
2596 trace->tool.fork = perf_event__process_fork;
2597 trace->tool.attr = perf_event__process_attr;
f3b3614a 2598 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2599 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2600 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2601
0a8cb85c 2602 trace->tool.ordered_events = true;
6810fc91
DA
2603 trace->tool.ordering_requires_timestamps = true;
2604
2605 /* add tid to output */
2606 trace->multiple_threads = true;
2607
8ceb41d7 2608 session = perf_session__new(&data, false, &trace->tool);
6810fc91 2609 if (session == NULL)
52e02834 2610 return -1;
6810fc91 2611
aa07df6e
DA
2612 if (trace->opts.target.pid)
2613 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2614
2615 if (trace->opts.target.tid)
2616 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2617
0a7e6d1b 2618 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2619 goto out;
2620
8fb598e5
DA
2621 trace->host = &session->machines.host;
2622
6810fc91
DA
2623 err = perf_session__set_tracepoints_handlers(session, handlers);
2624 if (err)
2625 goto out;
2626
003824e8
NK
2627 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2628 "raw_syscalls:sys_enter");
9aca7f17
DA
2629 /* older kernels have syscalls tp versus raw_syscalls */
2630 if (evsel == NULL)
2631 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2632 "syscalls:sys_enter");
003824e8 2633
e281a960
SF
2634 if (evsel &&
2635 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2636 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2637 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2638 goto out;
2639 }
2640
2641 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2642 "raw_syscalls:sys_exit");
9aca7f17
DA
2643 if (evsel == NULL)
2644 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2645 "syscalls:sys_exit");
e281a960
SF
2646 if (evsel &&
2647 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2648 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2649 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2650 goto out;
2651 }
2652
e5cadb93 2653 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2654 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2655 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2656 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2657 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2658 evsel->handler = trace__pgfault;
2659 }
2660
6810fc91
DA
2661 setup_pager();
2662
b7b61cbe 2663 err = perf_session__process_events(session);
6810fc91
DA
2664 if (err)
2665 pr_err("Failed to process events, error %d", err);
2666
bf2575c1
DA
2667 else if (trace->summary)
2668 trace__fprintf_thread_summary(trace, trace->output);
2669
6810fc91
DA
2670out:
2671 perf_session__delete(session);
2672
2673 return err;
2674}
2675
1302d88e
ACM
2676static size_t trace__fprintf_threads_header(FILE *fp)
2677{
2678 size_t printed;
2679
99ff7150 2680 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2681
2682 return printed;
2683}
2684
b535d523
ACM
2685DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2686 struct stats *stats;
2687 double msecs;
2688 int syscall;
2689)
2690{
2691 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2692 struct stats *stats = source->priv;
2693
2694 entry->syscall = source->i;
2695 entry->stats = stats;
2696 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2697}
2698
bf2575c1
DA
2699static size_t thread__dump_stats(struct thread_trace *ttrace,
2700 struct trace *trace, FILE *fp)
2701{
bf2575c1
DA
2702 size_t printed = 0;
2703 struct syscall *sc;
b535d523
ACM
2704 struct rb_node *nd;
2705 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2706
b535d523 2707 if (syscall_stats == NULL)
bf2575c1
DA
2708 return 0;
2709
2710 printed += fprintf(fp, "\n");
2711
834fd46d
MW
2712 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2713 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2714 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2715
98a91837 2716 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2717 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2718 if (stats) {
2719 double min = (double)(stats->min) / NSEC_PER_MSEC;
2720 double max = (double)(stats->max) / NSEC_PER_MSEC;
2721 double avg = avg_stats(stats);
2722 double pct;
2723 u64 n = (u64) stats->n;
2724
2725 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2726 avg /= NSEC_PER_MSEC;
2727
b535d523 2728 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2729 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2730 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2731 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2732 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2733 }
bf2575c1
DA
2734 }
2735
b535d523 2736 resort_rb__delete(syscall_stats);
bf2575c1 2737 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2738
2739 return printed;
2740}
2741
96c14451 2742static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2743{
96c14451 2744 size_t printed = 0;
89dceb22 2745 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2746 double ratio;
2747
2748 if (ttrace == NULL)
2749 return 0;
2750
2751 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2752
15e65c69 2753 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2754 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2755 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2756 if (ttrace->pfmaj)
2757 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2758 if (ttrace->pfmin)
2759 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2760 if (trace->sched)
2761 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2762 else if (fputc('\n', fp) != EOF)
2763 ++printed;
2764
bf2575c1 2765 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2766
96c14451
ACM
2767 return printed;
2768}
896cbb56 2769
96c14451
ACM
2770static unsigned long thread__nr_events(struct thread_trace *ttrace)
2771{
2772 return ttrace ? ttrace->nr_events : 0;
2773}
2774
2775DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2776 struct thread *thread;
2777)
2778{
2779 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2780}
2781
1302d88e
ACM
2782static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2783{
96c14451
ACM
2784 size_t printed = trace__fprintf_threads_header(fp);
2785 struct rb_node *nd;
91e467bc 2786 int i;
1302d88e 2787
91e467bc
KL
2788 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2789 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
96c14451 2790
91e467bc
KL
2791 if (threads == NULL) {
2792 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2793 return 0;
2794 }
896cbb56 2795
91e467bc
KL
2796 resort_rb__for_each_entry(nd, threads)
2797 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
96c14451 2798
91e467bc
KL
2799 resort_rb__delete(threads);
2800 }
96c14451 2801 return printed;
1302d88e
ACM
2802}
2803
ae9ed035
ACM
2804static int trace__set_duration(const struct option *opt, const char *str,
2805 int unset __maybe_unused)
2806{
2807 struct trace *trace = opt->value;
2808
2809 trace->duration_filter = atof(str);
2810 return 0;
2811}
2812
f078c385
ACM
2813static int trace__set_filter_pids(const struct option *opt, const char *str,
2814 int unset __maybe_unused)
2815{
2816 int ret = -1;
2817 size_t i;
2818 struct trace *trace = opt->value;
2819 /*
2820 * FIXME: introduce a intarray class, plain parse csv and create a
2821 * { int nr, int entries[] } struct...
2822 */
2823 struct intlist *list = intlist__new(str);
2824
2825 if (list == NULL)
2826 return -1;
2827
2828 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2829 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2830
2831 if (trace->filter_pids.entries == NULL)
2832 goto out;
2833
2834 trace->filter_pids.entries[0] = getpid();
2835
2836 for (i = 1; i < trace->filter_pids.nr; ++i)
2837 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2838
2839 intlist__delete(list);
2840 ret = 0;
2841out:
2842 return ret;
2843}
2844
c24ff998
ACM
2845static int trace__open_output(struct trace *trace, const char *filename)
2846{
2847 struct stat st;
2848
2849 if (!stat(filename, &st) && st.st_size) {
2850 char oldname[PATH_MAX];
2851
2852 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2853 unlink(oldname);
2854 rename(filename, oldname);
2855 }
2856
2857 trace->output = fopen(filename, "w");
2858
2859 return trace->output == NULL ? -errno : 0;
2860}
2861
598d02c5
SF
2862static int parse_pagefaults(const struct option *opt, const char *str,
2863 int unset __maybe_unused)
2864{
2865 int *trace_pgfaults = opt->value;
2866
2867 if (strcmp(str, "all") == 0)
2868 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2869 else if (strcmp(str, "maj") == 0)
2870 *trace_pgfaults |= TRACE_PFMAJ;
2871 else if (strcmp(str, "min") == 0)
2872 *trace_pgfaults |= TRACE_PFMIN;
2873 else
2874 return -1;
2875
2876 return 0;
2877}
2878
14a052df
ACM
2879static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2880{
2881 struct perf_evsel *evsel;
2882
e5cadb93 2883 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2884 evsel->handler = handler;
2885}
2886
017037ff
ACM
2887/*
2888 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2889 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2890 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2891 *
2892 * It'd be better to introduce a parse_options() variant that would return a
2893 * list with the terms it didn't match to an event...
2894 */
2895static int trace__parse_events_option(const struct option *opt, const char *str,
2896 int unset __maybe_unused)
2897{
2898 struct trace *trace = (struct trace *)opt->value;
2899 const char *s = str;
2900 char *sep = NULL, *lists[2] = { NULL, NULL, };
27702bcf 2901 int len = strlen(str) + 1, err = -1, list, idx;
017037ff
ACM
2902 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2903 char group_name[PATH_MAX];
2904
2905 if (strace_groups_dir == NULL)
2906 return -1;
2907
2908 if (*s == '!') {
2909 ++s;
2910 trace->not_ev_qualifier = true;
2911 }
2912
2913 while (1) {
2914 if ((sep = strchr(s, ',')) != NULL)
2915 *sep = '\0';
2916
2917 list = 0;
27702bcf
ACM
2918 if (syscalltbl__id(trace->sctbl, s) >= 0 ||
2919 syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
017037ff
ACM
2920 list = 1;
2921 } else {
2922 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2923 if (access(group_name, R_OK) == 0)
2924 list = 1;
2925 }
2926
2927 if (lists[list]) {
2928 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2929 } else {
2930 lists[list] = malloc(len);
2931 if (lists[list] == NULL)
2932 goto out;
2933 strcpy(lists[list], s);
2934 }
2935
2936 if (!sep)
2937 break;
2938
2939 *sep = ',';
2940 s = sep + 1;
2941 }
2942
2943 if (lists[1] != NULL) {
2944 struct strlist_config slist_config = {
2945 .dirname = strace_groups_dir,
2946 };
2947
2948 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2949 if (trace->ev_qualifier == NULL) {
2950 fputs("Not enough memory to parse event qualifier", trace->output);
2951 goto out;
2952 }
2953
2954 if (trace__validate_ev_qualifier(trace))
2955 goto out;
2956 }
2957
2958 err = 0;
2959
2960 if (lists[0]) {
2961 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2962 "event selector. use 'perf list' to list available events",
2963 parse_events_option);
2964 err = parse_events_option(&o, lists[0], 0);
2965 }
2966out:
2967 if (sep)
2968 *sep = ',';
2969
2970 return err;
2971}
2972
b0ad8ea6 2973int cmd_trace(int argc, const char **argv)
514f1c67 2974{
6fdd9cb7 2975 const char *trace_usage[] = {
f15eb531
NK
2976 "perf trace [<options>] [<command>]",
2977 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2978 "perf trace record [<options>] [<command>]",
2979 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2980 NULL
2981 };
2982 struct trace trace = {
514f1c67
ACM
2983 .syscalls = {
2984 . max = -1,
2985 },
2986 .opts = {
2987 .target = {
2988 .uid = UINT_MAX,
2989 .uses_mmap = true,
2990 },
2991 .user_freq = UINT_MAX,
2992 .user_interval = ULLONG_MAX,
509051ea 2993 .no_buffering = true,
38d5447d 2994 .mmap_pages = UINT_MAX,
9d9cad76 2995 .proc_map_timeout = 500,
514f1c67 2996 },
007d66a0 2997 .output = stderr,
50c95cbd 2998 .show_comm = true,
e281a960 2999 .trace_syscalls = true,
44621819 3000 .kernel_syscallchains = false,
05614993 3001 .max_stack = UINT_MAX,
514f1c67 3002 };
c24ff998 3003 const char *output_name = NULL;
514f1c67 3004 const struct option trace_options[] = {
017037ff
ACM
3005 OPT_CALLBACK('e', "event", &trace, "event",
3006 "event/syscall selector. use 'perf list' to list available events",
3007 trace__parse_events_option),
50c95cbd
ACM
3008 OPT_BOOLEAN(0, "comm", &trace.show_comm,
3009 "show the thread COMM next to its id"),
c522739d 3010 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
3011 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
3012 trace__parse_events_option),
c24ff998 3013 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 3014 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
3015 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
3016 "trace events on existing process id"),
ac9be8ee 3017 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 3018 "trace events on existing thread id"),
fa0e4ffe
ACM
3019 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
3020 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 3021 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 3022 "system-wide collection from all CPUs"),
ac9be8ee 3023 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 3024 "list of cpus to monitor"),
6810fc91 3025 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 3026 "child tasks do not inherit counters"),
994a1f78
JO
3027 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
3028 "number of mmap data pages",
3029 perf_evlist__parse_mmap_pages),
ac9be8ee 3030 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 3031 "user to profile"),
ae9ed035
ACM
3032 OPT_CALLBACK(0, "duration", &trace, "float",
3033 "show only events with duration > N.M ms",
3034 trace__set_duration),
1302d88e 3035 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 3036 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
3037 OPT_BOOLEAN('T', "time", &trace.full_time,
3038 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
3039 OPT_BOOLEAN('s', "summary", &trace.summary_only,
3040 "Show only syscall summary with statistics"),
3041 OPT_BOOLEAN('S', "with-summary", &trace.summary,
3042 "Show all syscalls and summary with statistics"),
598d02c5
SF
3043 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
3044 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 3045 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 3046 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
3047 OPT_CALLBACK(0, "call-graph", &trace.opts,
3048 "record_mode[,record_size]", record_callchain_help,
3049 &record_parse_callchain_opt),
44621819
ACM
3050 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
3051 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
3052 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
3053 "Set the minimum stack depth when parsing the callchain, "
3054 "anything below the specified depth will be ignored."),
c6d4a494
ACM
3055 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
3056 "Set the maximum stack depth when parsing the callchain, "
3057 "anything beyond the specified depth will be ignored. "
4cb93446 3058 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
591421e1
ACM
3059 OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
3060 "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
9d9cad76
KL
3061 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
3062 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
3063 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
3064 "ms to wait before starting measurement after program "
3065 "start"),
514f1c67
ACM
3066 OPT_END()
3067 };
ccd62a89 3068 bool __maybe_unused max_stack_user_set = true;
f3e459d1 3069 bool mmap_pages_user_set = true;
6fdd9cb7 3070 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 3071 int err;
32caf0d1 3072 char bf[BUFSIZ];
514f1c67 3073
4d08cb80
ACM
3074 signal(SIGSEGV, sighandler_dump_stack);
3075 signal(SIGFPE, sighandler_dump_stack);
3076
14a052df 3077 trace.evlist = perf_evlist__new();
fd0db102 3078 trace.sctbl = syscalltbl__new();
14a052df 3079
fd0db102 3080 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 3081 pr_err("Not enough memory to run!\n");
ff8f695c 3082 err = -ENOMEM;
14a052df
ACM
3083 goto out;
3084 }
3085
6fdd9cb7
YS
3086 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
3087 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 3088
d7888573
WN
3089 err = bpf__setup_stdout(trace.evlist);
3090 if (err) {
3091 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
3092 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
3093 goto out;
3094 }
3095
59247e33
ACM
3096 err = -1;
3097
598d02c5
SF
3098 if (trace.trace_pgfaults) {
3099 trace.opts.sample_address = true;
3100 trace.opts.sample_time = true;
3101 }
3102
f3e459d1
ACM
3103 if (trace.opts.mmap_pages == UINT_MAX)
3104 mmap_pages_user_set = false;
3105
05614993 3106 if (trace.max_stack == UINT_MAX) {
fe176085 3107 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
3108 max_stack_user_set = false;
3109 }
3110
3111#ifdef HAVE_DWARF_UNWIND_SUPPORT
75d50117 3112 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
05614993 3113 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
75d50117 3114 }
05614993
ACM
3115#endif
3116
2ddd5c04 3117 if (callchain_param.enabled) {
f3e459d1
ACM
3118 if (!mmap_pages_user_set && geteuid() == 0)
3119 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3120
566a0885 3121 symbol_conf.use_callchain = true;
f3e459d1 3122 }
566a0885 3123
14a052df
ACM
3124 if (trace.evlist->nr_entries > 0)
3125 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3126
1e28fe0a
SF
3127 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3128 return trace__record(&trace, argc-1, &argv[1]);
3129
3130 /* summary_only implies summary option, but don't overwrite summary if set */
3131 if (trace.summary_only)
3132 trace.summary = trace.summary_only;
3133
726f3234
ACM
3134 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3135 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3136 pr_err("Please specify something to trace.\n");
3137 return -1;
3138 }
3139
017037ff 3140 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3141 pr_err("The -e option can't be used with --no-syscalls.\n");
3142 goto out;
3143 }
3144
c24ff998
ACM
3145 if (output_name != NULL) {
3146 err = trace__open_output(&trace, output_name);
3147 if (err < 0) {
3148 perror("failed to create output file");
3149 goto out;
3150 }
3151 }
3152
fd0db102
ACM
3153 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3154
602ad878 3155 err = target__validate(&trace.opts.target);
32caf0d1 3156 if (err) {
602ad878 3157 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3158 fprintf(trace.output, "%s", bf);
3159 goto out_close;
32caf0d1
NK
3160 }
3161
602ad878 3162 err = target__parse_uid(&trace.opts.target);
514f1c67 3163 if (err) {
602ad878 3164 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3165 fprintf(trace.output, "%s", bf);
3166 goto out_close;
514f1c67
ACM
3167 }
3168
602ad878 3169 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3170 trace.opts.target.system_wide = true;
3171
6810fc91
DA
3172 if (input_name)
3173 err = trace__replay(&trace);
3174 else
3175 err = trace__run(&trace, argc, argv);
1302d88e 3176
c24ff998
ACM
3177out_close:
3178 if (output_name != NULL)
3179 fclose(trace.output);
3180out:
1302d88e 3181 return err;
514f1c67 3182}