]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - tools/perf/builtin-trace.c
perf trace beauty mmap: Ignore 'fd' and 'offset' args for MAP_ANONYMOUS
[mirror_ubuntu-bionic-kernel.git] / tools / perf / builtin-trace.c
CommitLineData
a598bb5e
ACM
1/*
2 * builtin-trace.c
3 *
4 * Builtin 'trace' command:
5 *
6 * Display a continuously updated trace of any workload, CPU, specific PID,
7 * system wide, etc. Default format is loosely strace like, but any other
8 * event may be specified using --event.
9 *
10 * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
11 *
12 * Initially based on the 'trace' prototype by Thomas Gleixner:
13 *
14 * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
15 *
16 * Released under the GPL v2. (and only v2, not any later version)
17 */
18
4e319027 19#include <traceevent/event-parse.h>
988bdb31 20#include <api/fs/tracing_path.h>
514f1c67 21#include "builtin.h"
752fde44 22#include "util/color.h"
7c304ee0 23#include "util/debug.h"
5ab8c689 24#include "util/event.h"
514f1c67 25#include "util/evlist.h"
4b6ab94e 26#include <subcmd/exec-cmd.h>
752fde44 27#include "util/machine.h"
9a3993d4 28#include "util/path.h"
6810fc91 29#include "util/session.h"
752fde44 30#include "util/thread.h"
4b6ab94e 31#include <subcmd/parse-options.h>
2ae3a312 32#include "util/strlist.h"
bdc89661 33#include "util/intlist.h"
514f1c67 34#include "util/thread_map.h"
bf2575c1 35#include "util/stat.h"
fd5cead2 36#include "trace/beauty/beauty.h"
97978b3e 37#include "trace-event.h"
9aca7f17 38#include "util/parse-events.h"
ba504235 39#include "util/bpf-loader.h"
566a0885 40#include "callchain.h"
fea01392 41#include "print_binary.h"
a067558e 42#include "string2.h"
fd0db102 43#include "syscalltbl.h"
96c14451 44#include "rb_resort.h"
514f1c67 45
a43783ae 46#include <errno.h>
fd20e811 47#include <inttypes.h>
fd0db102 48#include <libaudit.h> /* FIXME: Still needed for audit_errno_to_name */
4208735d 49#include <poll.h>
9607ad3a 50#include <signal.h>
514f1c67 51#include <stdlib.h>
017037ff 52#include <string.h>
8dd2a131 53#include <linux/err.h>
997bba8c
ACM
54#include <linux/filter.h>
55#include <linux/audit.h>
877a7a11 56#include <linux/kernel.h>
39878d49 57#include <linux/random.h>
c6d4a494 58#include <linux/stringify.h>
bd48c63e 59#include <linux/time64.h>
514f1c67 60
3d689ed6
ACM
61#include "sane_ctype.h"
62
c188e7ac
ACM
63#ifndef O_CLOEXEC
64# define O_CLOEXEC 02000000
65#endif
66
83a51694
ACM
67#ifndef F_LINUX_SPECIFIC_BASE
68# define F_LINUX_SPECIFIC_BASE 1024
69#endif
70
d1d438a3
ACM
71struct trace {
72 struct perf_tool tool;
fd0db102 73 struct syscalltbl *sctbl;
d1d438a3
ACM
74 struct {
75 int max;
76 struct syscall *table;
77 struct {
78 struct perf_evsel *sys_enter,
79 *sys_exit;
80 } events;
81 } syscalls;
82 struct record_opts opts;
83 struct perf_evlist *evlist;
84 struct machine *host;
85 struct thread *current;
86 u64 base_time;
87 FILE *output;
88 unsigned long nr_events;
89 struct strlist *ev_qualifier;
90 struct {
91 size_t nr;
92 int *entries;
93 } ev_qualifier_ids;
d1d438a3
ACM
94 struct {
95 size_t nr;
96 pid_t *entries;
97 } filter_pids;
98 double duration_filter;
99 double runtime_ms;
100 struct {
101 u64 vfs_getname,
102 proc_getname;
103 } stats;
c6d4a494 104 unsigned int max_stack;
5cf9c84e 105 unsigned int min_stack;
d1d438a3
ACM
106 bool not_ev_qualifier;
107 bool live;
108 bool full_time;
109 bool sched;
110 bool multiple_threads;
111 bool summary;
112 bool summary_only;
113 bool show_comm;
114 bool show_tool_stats;
115 bool trace_syscalls;
44621819 116 bool kernel_syscallchains;
d1d438a3
ACM
117 bool force;
118 bool vfs_getname;
119 int trace_pgfaults;
fd0db102 120 int open_id;
d1d438a3 121};
a1c2552d 122
77170988
ACM
123struct tp_field {
124 int offset;
125 union {
126 u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
127 void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
128 };
129};
130
131#define TP_UINT_FIELD(bits) \
132static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
133{ \
55d43bca
DA
134 u##bits value; \
135 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
136 return value; \
77170988
ACM
137}
138
139TP_UINT_FIELD(8);
140TP_UINT_FIELD(16);
141TP_UINT_FIELD(32);
142TP_UINT_FIELD(64);
143
144#define TP_UINT_FIELD__SWAPPED(bits) \
145static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
146{ \
55d43bca
DA
147 u##bits value; \
148 memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
77170988
ACM
149 return bswap_##bits(value);\
150}
151
152TP_UINT_FIELD__SWAPPED(16);
153TP_UINT_FIELD__SWAPPED(32);
154TP_UINT_FIELD__SWAPPED(64);
155
156static int tp_field__init_uint(struct tp_field *field,
157 struct format_field *format_field,
158 bool needs_swap)
159{
160 field->offset = format_field->offset;
161
162 switch (format_field->size) {
163 case 1:
164 field->integer = tp_field__u8;
165 break;
166 case 2:
167 field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
168 break;
169 case 4:
170 field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
171 break;
172 case 8:
173 field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
174 break;
175 default:
176 return -1;
177 }
178
179 return 0;
180}
181
182static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
183{
184 return sample->raw_data + field->offset;
185}
186
187static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
188{
189 field->offset = format_field->offset;
190 field->pointer = tp_field__ptr;
191 return 0;
192}
193
194struct syscall_tp {
195 struct tp_field id;
196 union {
197 struct tp_field args, ret;
198 };
199};
200
201static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
202 struct tp_field *field,
203 const char *name)
204{
205 struct format_field *format_field = perf_evsel__field(evsel, name);
206
207 if (format_field == NULL)
208 return -1;
209
210 return tp_field__init_uint(field, format_field, evsel->needs_swap);
211}
212
213#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
214 ({ struct syscall_tp *sc = evsel->priv;\
215 perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
216
217static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
218 struct tp_field *field,
219 const char *name)
220{
221 struct format_field *format_field = perf_evsel__field(evsel, name);
222
223 if (format_field == NULL)
224 return -1;
225
226 return tp_field__init_ptr(field, format_field);
227}
228
229#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
230 ({ struct syscall_tp *sc = evsel->priv;\
231 perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
232
233static void perf_evsel__delete_priv(struct perf_evsel *evsel)
234{
04662523 235 zfree(&evsel->priv);
77170988
ACM
236 perf_evsel__delete(evsel);
237}
238
96695d44
NK
239static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel, void *handler)
240{
241 evsel->priv = malloc(sizeof(struct syscall_tp));
242 if (evsel->priv != NULL) {
243 if (perf_evsel__init_sc_tp_uint_field(evsel, id))
244 goto out_delete;
245
246 evsel->handler = handler;
247 return 0;
248 }
249
250 return -ENOMEM;
251
252out_delete:
04662523 253 zfree(&evsel->priv);
96695d44
NK
254 return -ENOENT;
255}
256
ef503831 257static struct perf_evsel *perf_evsel__syscall_newtp(const char *direction, void *handler)
77170988 258{
ef503831 259 struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
77170988 260
9aca7f17 261 /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
8dd2a131 262 if (IS_ERR(evsel))
9aca7f17
DA
263 evsel = perf_evsel__newtp("syscalls", direction);
264
8dd2a131
JO
265 if (IS_ERR(evsel))
266 return NULL;
267
268 if (perf_evsel__init_syscall_tp(evsel, handler))
269 goto out_delete;
77170988
ACM
270
271 return evsel;
272
273out_delete:
274 perf_evsel__delete_priv(evsel);
275 return NULL;
276}
277
278#define perf_evsel__sc_tp_uint(evsel, name, sample) \
279 ({ struct syscall_tp *fields = evsel->priv; \
280 fields->name.integer(&fields->name, sample); })
281
282#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
283 ({ struct syscall_tp *fields = evsel->priv; \
284 fields->name.pointer(&fields->name, sample); })
285
0ae79636
ACM
286size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
287{
288 int idx = val - sa->offset;
1f115cb7 289
0ae79636
ACM
290 if (idx < 0 || idx >= sa->nr_entries)
291 return scnprintf(bf, size, intfmt, val);
1f115cb7 292
0ae79636 293 return scnprintf(bf, size, "%s", sa->entries[idx]);
03e3adc9
ACM
294}
295
975b7c2f
ACM
296static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
297 const char *intfmt,
298 struct syscall_arg *arg)
1f115cb7 299{
0ae79636 300 return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
1f115cb7
ACM
301}
302
975b7c2f
ACM
303static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
304 struct syscall_arg *arg)
305{
306 return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
307}
308
1f115cb7
ACM
309#define SCA_STRARRAY syscall_arg__scnprintf_strarray
310
83a51694
ACM
311struct strarrays {
312 int nr_entries;
313 struct strarray **entries;
314};
315
316#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
317 .nr_entries = ARRAY_SIZE(array), \
318 .entries = array, \
319}
320
274e86fd
ACM
321size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
322 struct syscall_arg *arg)
83a51694
ACM
323{
324 struct strarrays *sas = arg->parm;
325 int i;
326
327 for (i = 0; i < sas->nr_entries; ++i) {
328 struct strarray *sa = sas->entries[i];
329 int idx = arg->val - sa->offset;
330
331 if (idx >= 0 && idx < sa->nr_entries) {
332 if (sa->entries[idx] == NULL)
333 break;
334 return scnprintf(bf, size, "%s", sa->entries[idx]);
335 }
336 }
337
338 return scnprintf(bf, size, "%d", arg->val);
339}
340
844ae5b4
ACM
341#if defined(__i386__) || defined(__x86_64__)
342/*
343 * FIXME: Make this available to all arches as soon as the ioctl beautifier
344 * gets rewritten to support all arches.
345 */
78645cf3
ACM
346static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
347 struct syscall_arg *arg)
348{
349 return __syscall_arg__scnprintf_strarray(bf, size, "%#x", arg);
350}
351
352#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
844ae5b4 353#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 354
48e1f91a
ACM
355#ifndef AT_FDCWD
356#define AT_FDCWD -100
357#endif
358
75b757ca
ACM
359static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
360 struct syscall_arg *arg)
361{
362 int fd = arg->val;
363
364 if (fd == AT_FDCWD)
365 return scnprintf(bf, size, "CWD");
366
367 return syscall_arg__scnprintf_fd(bf, size, arg);
368}
369
370#define SCA_FDAT syscall_arg__scnprintf_fd_at
371
372static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
373 struct syscall_arg *arg);
374
375#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
376
2c2b1623 377size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
13d4ff3e 378{
01533e97 379 return scnprintf(bf, size, "%#lx", arg->val);
13d4ff3e
ACM
380}
381
2c2b1623 382size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
a1c2552d
ACM
383{
384 return scnprintf(bf, size, "%d", arg->val);
385}
386
5dde91ed
ACM
387size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
388{
389 return scnprintf(bf, size, "%ld", arg->val);
390}
391
729a7841
ACM
392static const char *bpf_cmd[] = {
393 "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
394 "MAP_GET_NEXT_KEY", "PROG_LOAD",
395};
396static DEFINE_STRARRAY(bpf_cmd);
397
03e3adc9
ACM
398static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
399static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
eac032c5 400
1f115cb7
ACM
401static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
402static DEFINE_STRARRAY(itimers);
403
b62bee1b
ACM
404static const char *keyctl_options[] = {
405 "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
406 "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
407 "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
408 "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
409 "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
410};
411static DEFINE_STRARRAY(keyctl_options);
412
efe6b882
ACM
413static const char *whences[] = { "SET", "CUR", "END",
414#ifdef SEEK_DATA
415"DATA",
416#endif
417#ifdef SEEK_HOLE
418"HOLE",
419#endif
420};
421static DEFINE_STRARRAY(whences);
f9da0b0c 422
80f587d5
ACM
423static const char *fcntl_cmds[] = {
424 "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
e000e5e3
ACM
425 "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
426 "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
427 "GETOWNER_UIDS",
80f587d5
ACM
428};
429static DEFINE_STRARRAY(fcntl_cmds);
430
83a51694
ACM
431static const char *fcntl_linux_specific_cmds[] = {
432 "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
433 "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
64e4561d 434 "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
83a51694
ACM
435};
436
437static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
438
439static struct strarray *fcntl_cmds_arrays[] = {
440 &strarray__fcntl_cmds,
441 &strarray__fcntl_linux_specific_cmds,
442};
443
444static DEFINE_STRARRAYS(fcntl_cmds_arrays);
445
c045bf02
ACM
446static const char *rlimit_resources[] = {
447 "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
448 "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
449 "RTTIME",
450};
451static DEFINE_STRARRAY(rlimit_resources);
452
eb5b1b14
ACM
453static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
454static DEFINE_STRARRAY(sighow);
455
4f8c1b74
DA
456static const char *clockid[] = {
457 "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
28ebb87c
ACM
458 "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
459 "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
4f8c1b74
DA
460};
461static DEFINE_STRARRAY(clockid);
462
e10bce81
ACM
463static const char *socket_families[] = {
464 "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
465 "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
466 "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
467 "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
468 "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
469 "ALG", "NFC", "VSOCK",
470};
471static DEFINE_STRARRAY(socket_families);
472
51108999
ACM
473static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
474 struct syscall_arg *arg)
475{
476 size_t printed = 0;
477 int mode = arg->val;
478
479 if (mode == F_OK) /* 0 */
480 return scnprintf(bf, size, "F");
481#define P_MODE(n) \
482 if (mode & n##_OK) { \
483 printed += scnprintf(bf + printed, size - printed, "%s", #n); \
484 mode &= ~n##_OK; \
485 }
486
487 P_MODE(R);
488 P_MODE(W);
489 P_MODE(X);
490#undef P_MODE
491
492 if (mode)
493 printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
494
495 return printed;
496}
497
498#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
499
f994592d
ACM
500static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
501 struct syscall_arg *arg);
502
503#define SCA_FILENAME syscall_arg__scnprintf_filename
504
46cce19b
ACM
505static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
506 struct syscall_arg *arg)
507{
508 int printed = 0, flags = arg->val;
509
510#define P_FLAG(n) \
511 if (flags & O_##n) { \
512 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
513 flags &= ~O_##n; \
514 }
515
516 P_FLAG(CLOEXEC);
517 P_FLAG(NONBLOCK);
518#undef P_FLAG
519
520 if (flags)
521 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
522
523 return printed;
524}
525
526#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
527
844ae5b4
ACM
528#if defined(__i386__) || defined(__x86_64__)
529/*
530 * FIXME: Make this available to all arches.
531 */
78645cf3
ACM
532#define TCGETS 0x5401
533
534static const char *tioctls[] = {
535 "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
536 "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL",
537 "TIOCSCTTY", "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI",
538 "TIOCGWINSZ", "TIOCSWINSZ", "TIOCMGET", "TIOCMBIS", "TIOCMBIC",
539 "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR", "FIONREAD", "TIOCLINUX",
540 "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT", "FIONBIO",
541 "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP", [0x27] = "TIOCSBRK",
542 "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2", "TCSETSW2", "TCSETSF2",
543 "TIOCGRS485", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
544 "TIOCGDEV||TCGETX", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG",
545 "TIOCVHANGUP", "TIOCGPKT", "TIOCGPTLCK", "TIOCGEXCL",
546 [0x50] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
547 "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
548 "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
549 "TIOCMIWAIT", "TIOCGICOUNT", [0x60] = "FIOQSIZE",
550};
551
552static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
844ae5b4 553#endif /* defined(__i386__) || defined(__x86_64__) */
78645cf3 554
a355a61e
ACM
555#ifndef GRND_NONBLOCK
556#define GRND_NONBLOCK 0x0001
557#endif
558#ifndef GRND_RANDOM
559#define GRND_RANDOM 0x0002
560#endif
561
39878d49
ACM
562static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
563 struct syscall_arg *arg)
564{
565 int printed = 0, flags = arg->val;
566
567#define P_FLAG(n) \
568 if (flags & GRND_##n) { \
569 printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
570 flags &= ~GRND_##n; \
571 }
572
573 P_FLAG(RANDOM);
574 P_FLAG(NONBLOCK);
575#undef P_FLAG
576
577 if (flags)
578 printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
579
580 return printed;
581}
582
583#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
584
82d4a110
ACM
585#define STRARRAY(name, array) \
586 { .scnprintf = SCA_STRARRAY, \
587 .parm = &strarray__##array, }
453350dd 588
ea8dc3ce 589#include "trace/beauty/eventfd.c"
8bf382ce 590#include "trace/beauty/flock.c"
d5d71e86 591#include "trace/beauty/futex_op.c"
df4cb167 592#include "trace/beauty/mmap.c"
ba2f22cf 593#include "trace/beauty/mode_t.c"
a30e6259 594#include "trace/beauty/msg_flags.c"
8f48df69 595#include "trace/beauty/open_flags.c"
62de344e 596#include "trace/beauty/perf_event_open.c"
d5d71e86 597#include "trace/beauty/pid.c"
a3bca91f 598#include "trace/beauty/sched_policy.c"
f5cd95ea 599#include "trace/beauty/seccomp.c"
12199d8e 600#include "trace/beauty/signum.c"
bbf86c43 601#include "trace/beauty/socket_type.c"
7206b900 602#include "trace/beauty/waitid_options.c"
a3bca91f 603
82d4a110
ACM
604struct syscall_arg_fmt {
605 size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
606 void *parm;
d47737d5 607 bool show_zero;
82d4a110
ACM
608};
609
514f1c67
ACM
610static struct syscall_fmt {
611 const char *name;
aec1930b 612 const char *alias;
82d4a110 613 struct syscall_arg_fmt arg[6];
11c8e39f 614 bool errpid;
514f1c67 615 bool timeout;
04b34729 616 bool hexret;
514f1c67 617} syscall_fmts[] = {
1f63139c 618 { .name = "access",
82d4a110 619 .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
1f63139c
ACM
620 { .name = "arch_prctl", .alias = "prctl", },
621 { .name = "bpf",
82d4a110 622 .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
beccb2b5 623 { .name = "brk", .hexret = true,
82d4a110 624 .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
1f63139c 625 { .name = "clock_gettime",
82d4a110 626 .arg = { [0] = STRARRAY(clk_id, clockid), }, },
11c8e39f 627 { .name = "clone", .errpid = true, },
1f63139c 628 { .name = "close",
82d4a110 629 .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
1f63139c 630 { .name = "epoll_ctl",
82d4a110 631 .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
1f63139c 632 { .name = "eventfd2",
82d4a110 633 .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
1f63139c 634 { .name = "fchmodat",
82d4a110 635 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 636 { .name = "fchownat",
82d4a110 637 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 638 { .name = "fcntl",
82d4a110 639 .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
39cc355b
ACM
640 .parm = &strarrays__fcntl_cmds_arrays,
641 .show_zero = true, },
82d4a110 642 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
1f63139c 643 { .name = "flock",
82d4a110 644 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
1f63139c
ACM
645 { .name = "fstat", .alias = "newfstat", },
646 { .name = "fstatat", .alias = "newfstatat", },
647 { .name = "futex",
82d4a110 648 .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, },
1f63139c 649 { .name = "futimesat",
82d4a110 650 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 651 { .name = "getitimer",
82d4a110 652 .arg = { [0] = STRARRAY(which, itimers), }, },
c65f1070 653 { .name = "getpid", .errpid = true, },
d1d438a3 654 { .name = "getpgid", .errpid = true, },
c65f1070 655 { .name = "getppid", .errpid = true, },
1f63139c 656 { .name = "getrandom",
82d4a110 657 .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
1f63139c 658 { .name = "getrlimit",
82d4a110 659 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 660 { .name = "ioctl",
82d4a110 661 .arg = {
844ae5b4
ACM
662#if defined(__i386__) || defined(__x86_64__)
663/*
664 * FIXME: Make this available to all arches.
665 */
82d4a110
ACM
666 [1] = { .scnprintf = SCA_STRHEXARRAY, /* cmd */
667 .parm = &strarray__tioctls, },
668 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 669#else
82d4a110 670 [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
844ae5b4 671#endif
1f63139c 672 { .name = "keyctl",
82d4a110 673 .arg = { [0] = STRARRAY(option, keyctl_options), }, },
1f63139c 674 { .name = "kill",
82d4a110 675 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 676 { .name = "linkat",
82d4a110 677 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 678 { .name = "lseek",
82d4a110 679 .arg = { [2] = STRARRAY(whence, whences), }, },
1f63139c
ACM
680 { .name = "lstat", .alias = "newlstat", },
681 { .name = "madvise",
82d4a110
ACM
682 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
683 [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
1f63139c 684 { .name = "mkdirat",
82d4a110 685 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 686 { .name = "mknodat",
82d4a110 687 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
1f63139c 688 { .name = "mlock",
82d4a110 689 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 690 { .name = "mlockall",
82d4a110 691 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
beccb2b5 692 { .name = "mmap", .hexret = true,
54265664
JO
693/* The standard mmap maps to old_mmap on s390x */
694#if defined(__s390x__)
695 .alias = "old_mmap",
696#endif
82d4a110
ACM
697 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
698 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
699 [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
1f63139c 700 { .name = "mprotect",
82d4a110
ACM
701 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
702 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
1f63139c 703 { .name = "mq_unlink",
82d4a110 704 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
ae685380 705 { .name = "mremap", .hexret = true,
82d4a110
ACM
706 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
707 [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
708 [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
1f63139c 709 { .name = "munlock",
82d4a110 710 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 711 { .name = "munmap",
82d4a110 712 .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
1f63139c 713 { .name = "name_to_handle_at",
82d4a110 714 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 715 { .name = "newfstatat",
82d4a110 716 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 717 { .name = "open",
82d4a110 718 .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 719 { .name = "open_by_handle_at",
82d4a110
ACM
720 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
721 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 722 { .name = "openat",
82d4a110
ACM
723 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
724 [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
1f63139c 725 { .name = "perf_event_open",
82d4a110
ACM
726 .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
727 [3] = { .scnprintf = SCA_FD, /* group_fd */ },
728 [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
1f63139c 729 { .name = "pipe2",
82d4a110 730 .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
1f63139c
ACM
731 { .name = "poll", .timeout = true, },
732 { .name = "ppoll", .timeout = true, },
733 { .name = "pread", .alias = "pread64", },
734 { .name = "preadv", .alias = "pread", },
735 { .name = "prlimit64",
82d4a110 736 .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
1f63139c
ACM
737 { .name = "pwrite", .alias = "pwrite64", },
738 { .name = "readlinkat",
82d4a110 739 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 740 { .name = "recvfrom",
82d4a110 741 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 742 { .name = "recvmmsg",
82d4a110 743 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 744 { .name = "recvmsg",
82d4a110 745 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 746 { .name = "renameat",
82d4a110 747 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 748 { .name = "rt_sigaction",
82d4a110 749 .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 750 { .name = "rt_sigprocmask",
82d4a110 751 .arg = { [0] = STRARRAY(how, sighow), }, },
1f63139c 752 { .name = "rt_sigqueueinfo",
82d4a110 753 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 754 { .name = "rt_tgsigqueueinfo",
82d4a110 755 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 756 { .name = "sched_setscheduler",
82d4a110 757 .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
1f63139c 758 { .name = "seccomp",
82d4a110
ACM
759 .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
760 [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
1f63139c
ACM
761 { .name = "select", .timeout = true, },
762 { .name = "sendmmsg",
82d4a110 763 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 764 { .name = "sendmsg",
82d4a110 765 .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
1f63139c 766 { .name = "sendto",
82d4a110 767 .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
c65f1070 768 { .name = "set_tid_address", .errpid = true, },
1f63139c 769 { .name = "setitimer",
82d4a110 770 .arg = { [0] = STRARRAY(which, itimers), }, },
1f63139c 771 { .name = "setrlimit",
82d4a110 772 .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
1f63139c 773 { .name = "socket",
82d4a110
ACM
774 .arg = { [0] = STRARRAY(family, socket_families),
775 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c 776 { .name = "socketpair",
82d4a110
ACM
777 .arg = { [0] = STRARRAY(family, socket_families),
778 [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, }, },
1f63139c
ACM
779 { .name = "stat", .alias = "newstat", },
780 { .name = "statx",
82d4a110
ACM
781 .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
782 [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
783 [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
1f63139c 784 { .name = "swapoff",
82d4a110 785 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 786 { .name = "swapon",
82d4a110 787 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
1f63139c 788 { .name = "symlinkat",
82d4a110 789 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 790 { .name = "tgkill",
82d4a110 791 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c 792 { .name = "tkill",
82d4a110 793 .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
1f63139c
ACM
794 { .name = "uname", .alias = "newuname", },
795 { .name = "unlinkat",
82d4a110 796 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
1f63139c 797 { .name = "utimensat",
82d4a110 798 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
11c8e39f 799 { .name = "wait4", .errpid = true,
82d4a110 800 .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
11c8e39f 801 { .name = "waitid", .errpid = true,
82d4a110 802 .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
514f1c67
ACM
803};
804
805static int syscall_fmt__cmp(const void *name, const void *fmtp)
806{
807 const struct syscall_fmt *fmt = fmtp;
808 return strcmp(name, fmt->name);
809}
810
811static struct syscall_fmt *syscall_fmt__find(const char *name)
812{
813 const int nmemb = ARRAY_SIZE(syscall_fmts);
814 return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
815}
816
817struct syscall {
818 struct event_format *tp_format;
f208bd8d
ACM
819 int nr_args;
820 struct format_field *args;
514f1c67 821 const char *name;
5089f20e 822 bool is_exit;
514f1c67 823 struct syscall_fmt *fmt;
82d4a110 824 struct syscall_arg_fmt *arg_fmt;
514f1c67
ACM
825};
826
fd2b2975
ACM
827/*
828 * We need to have this 'calculated' boolean because in some cases we really
829 * don't know what is the duration of a syscall, for instance, when we start
830 * a session and some threads are waiting for a syscall to finish, say 'poll',
831 * in which case all we can do is to print "( ? ) for duration and for the
832 * start timestamp.
833 */
834static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
60c907ab
ACM
835{
836 double duration = (double)t / NSEC_PER_MSEC;
837 size_t printed = fprintf(fp, "(");
838
fd2b2975
ACM
839 if (!calculated)
840 printed += fprintf(fp, " ? ");
841 else if (duration >= 1.0)
60c907ab
ACM
842 printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
843 else if (duration >= 0.01)
844 printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
845 else
846 printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
c24ff998 847 return printed + fprintf(fp, "): ");
60c907ab
ACM
848}
849
f994592d
ACM
850/**
851 * filename.ptr: The filename char pointer that will be vfs_getname'd
852 * filename.entry_str_pos: Where to insert the string translated from
853 * filename.ptr by the vfs_getname tracepoint/kprobe.
84486caa
ACM
854 * ret_scnprintf: syscall args may set this to a different syscall return
855 * formatter, for instance, fcntl may return fds, file flags, etc.
f994592d 856 */
752fde44
ACM
857struct thread_trace {
858 u64 entry_time;
752fde44 859 bool entry_pending;
efd5745e 860 unsigned long nr_events;
a2ea67d7 861 unsigned long pfmaj, pfmin;
752fde44 862 char *entry_str;
1302d88e 863 double runtime_ms;
7ee57434 864 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
f994592d
ACM
865 struct {
866 unsigned long ptr;
7f4f8001
ACM
867 short int entry_str_pos;
868 bool pending_open;
869 unsigned int namelen;
870 char *name;
f994592d 871 } filename;
75b757ca
ACM
872 struct {
873 int max;
874 char **table;
875 } paths;
bf2575c1
DA
876
877 struct intlist *syscall_stats;
752fde44
ACM
878};
879
880static struct thread_trace *thread_trace__new(void)
881{
75b757ca
ACM
882 struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
883
884 if (ttrace)
885 ttrace->paths.max = -1;
886
bf2575c1
DA
887 ttrace->syscall_stats = intlist__new(NULL);
888
75b757ca 889 return ttrace;
752fde44
ACM
890}
891
c24ff998 892static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
752fde44 893{
efd5745e
ACM
894 struct thread_trace *ttrace;
895
752fde44
ACM
896 if (thread == NULL)
897 goto fail;
898
89dceb22
NK
899 if (thread__priv(thread) == NULL)
900 thread__set_priv(thread, thread_trace__new());
48000a1a 901
89dceb22 902 if (thread__priv(thread) == NULL)
752fde44
ACM
903 goto fail;
904
89dceb22 905 ttrace = thread__priv(thread);
efd5745e
ACM
906 ++ttrace->nr_events;
907
908 return ttrace;
752fde44 909fail:
c24ff998 910 color_fprintf(fp, PERF_COLOR_RED,
752fde44
ACM
911 "WARNING: not enough memory, dropping samples!\n");
912 return NULL;
913}
914
84486caa
ACM
915
916void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
7ee57434 917 size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
84486caa
ACM
918{
919 struct thread_trace *ttrace = thread__priv(arg->thread);
920
921 ttrace->ret_scnprintf = ret_scnprintf;
922}
923
598d02c5
SF
924#define TRACE_PFMAJ (1 << 0)
925#define TRACE_PFMIN (1 << 1)
926
e4d44e83
ACM
927static const size_t trace__entry_str_size = 2048;
928
97119f37 929static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
75b757ca 930{
89dceb22 931 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
932
933 if (fd > ttrace->paths.max) {
934 char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
935
936 if (npath == NULL)
937 return -1;
938
939 if (ttrace->paths.max != -1) {
940 memset(npath + ttrace->paths.max + 1, 0,
941 (fd - ttrace->paths.max) * sizeof(char *));
942 } else {
943 memset(npath, 0, (fd + 1) * sizeof(char *));
944 }
945
946 ttrace->paths.table = npath;
947 ttrace->paths.max = fd;
948 }
949
950 ttrace->paths.table[fd] = strdup(pathname);
951
952 return ttrace->paths.table[fd] != NULL ? 0 : -1;
953}
954
97119f37
ACM
955static int thread__read_fd_path(struct thread *thread, int fd)
956{
957 char linkname[PATH_MAX], pathname[PATH_MAX];
958 struct stat st;
959 int ret;
960
961 if (thread->pid_ == thread->tid) {
962 scnprintf(linkname, sizeof(linkname),
963 "/proc/%d/fd/%d", thread->pid_, fd);
964 } else {
965 scnprintf(linkname, sizeof(linkname),
966 "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
967 }
968
969 if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
970 return -1;
971
972 ret = readlink(linkname, pathname, sizeof(pathname));
973
974 if (ret < 0 || ret > st.st_size)
975 return -1;
976
977 pathname[ret] = '\0';
978 return trace__set_fd_pathname(thread, fd, pathname);
979}
980
c522739d
ACM
981static const char *thread__fd_path(struct thread *thread, int fd,
982 struct trace *trace)
75b757ca 983{
89dceb22 984 struct thread_trace *ttrace = thread__priv(thread);
75b757ca
ACM
985
986 if (ttrace == NULL)
987 return NULL;
988
989 if (fd < 0)
990 return NULL;
991
cdcd1e6b 992 if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
c522739d
ACM
993 if (!trace->live)
994 return NULL;
995 ++trace->stats.proc_getname;
cdcd1e6b 996 if (thread__read_fd_path(thread, fd))
c522739d
ACM
997 return NULL;
998 }
75b757ca
ACM
999
1000 return ttrace->paths.table[fd];
1001}
1002
fc65eb82 1003size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
75b757ca
ACM
1004{
1005 int fd = arg->val;
1006 size_t printed = scnprintf(bf, size, "%d", fd);
c522739d 1007 const char *path = thread__fd_path(arg->thread, fd, arg->trace);
75b757ca
ACM
1008
1009 if (path)
1010 printed += scnprintf(bf + printed, size - printed, "<%s>", path);
1011
1012 return printed;
1013}
1014
1015static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
1016 struct syscall_arg *arg)
1017{
1018 int fd = arg->val;
1019 size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
89dceb22 1020 struct thread_trace *ttrace = thread__priv(arg->thread);
75b757ca 1021
04662523
ACM
1022 if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
1023 zfree(&ttrace->paths.table[fd]);
75b757ca
ACM
1024
1025 return printed;
1026}
1027
f994592d
ACM
1028static void thread__set_filename_pos(struct thread *thread, const char *bf,
1029 unsigned long ptr)
1030{
1031 struct thread_trace *ttrace = thread__priv(thread);
1032
1033 ttrace->filename.ptr = ptr;
1034 ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
1035}
1036
1037static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
1038 struct syscall_arg *arg)
1039{
1040 unsigned long ptr = arg->val;
1041
1042 if (!arg->trace->vfs_getname)
1043 return scnprintf(bf, size, "%#x", ptr);
1044
1045 thread__set_filename_pos(arg->thread, bf, ptr);
1046 return 0;
1047}
1048
ae9ed035
ACM
1049static bool trace__filter_duration(struct trace *trace, double t)
1050{
1051 return t < (trace->duration_filter * NSEC_PER_MSEC);
1052}
1053
fd2b2975 1054static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
752fde44
ACM
1055{
1056 double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
1057
60c907ab 1058 return fprintf(fp, "%10.3f ", ts);
752fde44
ACM
1059}
1060
fd2b2975
ACM
1061/*
1062 * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
1063 * using ttrace->entry_time for a thread that receives a sys_exit without
1064 * first having received a sys_enter ("poll" issued before tracing session
1065 * starts, lost sys_enter exit due to ring buffer overflow).
1066 */
1067static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
1068{
1069 if (tstamp > 0)
1070 return __trace__fprintf_tstamp(trace, tstamp, fp);
1071
1072 return fprintf(fp, " ? ");
1073}
1074
f15eb531 1075static bool done = false;
ba209f85 1076static bool interrupted = false;
f15eb531 1077
ba209f85 1078static void sig_handler(int sig)
f15eb531
NK
1079{
1080 done = true;
ba209f85 1081 interrupted = sig == SIGINT;
f15eb531
NK
1082}
1083
752fde44 1084static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
fd2b2975 1085 u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
752fde44
ACM
1086{
1087 size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
fd2b2975 1088 printed += fprintf_duration(duration, duration_calculated, fp);
752fde44 1089
50c95cbd
ACM
1090 if (trace->multiple_threads) {
1091 if (trace->show_comm)
1902efe7 1092 printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
38051234 1093 printed += fprintf(fp, "%d ", thread->tid);
50c95cbd 1094 }
752fde44
ACM
1095
1096 return printed;
1097}
1098
c24ff998 1099static int trace__process_event(struct trace *trace, struct machine *machine,
162f0bef 1100 union perf_event *event, struct perf_sample *sample)
752fde44
ACM
1101{
1102 int ret = 0;
1103
1104 switch (event->header.type) {
1105 case PERF_RECORD_LOST:
c24ff998 1106 color_fprintf(trace->output, PERF_COLOR_RED,
752fde44 1107 "LOST %" PRIu64 " events!\n", event->lost.lost);
162f0bef 1108 ret = machine__process_lost_event(machine, event, sample);
3ed5ca2e 1109 break;
752fde44 1110 default:
162f0bef 1111 ret = machine__process_event(machine, event, sample);
752fde44
ACM
1112 break;
1113 }
1114
1115 return ret;
1116}
1117
c24ff998 1118static int trace__tool_process(struct perf_tool *tool,
752fde44 1119 union perf_event *event,
162f0bef 1120 struct perf_sample *sample,
752fde44
ACM
1121 struct machine *machine)
1122{
c24ff998 1123 struct trace *trace = container_of(tool, struct trace, tool);
162f0bef 1124 return trace__process_event(trace, machine, event, sample);
752fde44
ACM
1125}
1126
caf8a0d0
ACM
1127static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
1128{
1129 struct machine *machine = vmachine;
1130
1131 if (machine->kptr_restrict_warned)
1132 return NULL;
1133
1134 if (symbol_conf.kptr_restrict) {
1135 pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
1136 "Check /proc/sys/kernel/kptr_restrict.\n\n"
1137 "Kernel samples will not be resolved.\n");
1138 machine->kptr_restrict_warned = true;
1139 return NULL;
1140 }
1141
1142 return machine__resolve_kernel_addr(vmachine, addrp, modp);
1143}
1144
752fde44
ACM
1145static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
1146{
0a7e6d1b 1147 int err = symbol__init(NULL);
752fde44
ACM
1148
1149 if (err)
1150 return err;
1151
8fb598e5
DA
1152 trace->host = machine__new_host();
1153 if (trace->host == NULL)
1154 return -ENOMEM;
752fde44 1155
caf8a0d0 1156 if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0)
706c3da4
ACM
1157 return -errno;
1158
a33fbd56 1159 err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
9d9cad76
KL
1160 evlist->threads, trace__tool_process, false,
1161 trace->opts.proc_map_timeout);
752fde44
ACM
1162 if (err)
1163 symbol__exit();
1164
1165 return err;
1166}
1167
13d4ff3e
ACM
1168static int syscall__set_arg_fmts(struct syscall *sc)
1169{
1170 struct format_field *field;
b6565c90 1171 int idx = 0, len;
13d4ff3e 1172
82d4a110
ACM
1173 sc->arg_fmt = calloc(sc->nr_args, sizeof(*sc->arg_fmt));
1174 if (sc->arg_fmt == NULL)
13d4ff3e
ACM
1175 return -1;
1176
82d4a110
ACM
1177 for (field = sc->args; field; field = field->next, ++idx) {
1178 if (sc->fmt) {
1179 sc->arg_fmt[idx] = sc->fmt->arg[idx];
1180
1181 if (sc->fmt->arg[idx].scnprintf)
1182 continue;
1183 }
1f115cb7 1184
82d4a110 1185 if (strcmp(field->type, "const char *") == 0 &&
12f3ca4f
ACM
1186 (strcmp(field->name, "filename") == 0 ||
1187 strcmp(field->name, "path") == 0 ||
1188 strcmp(field->name, "pathname") == 0))
82d4a110 1189 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
beccb2b5 1190 else if (field->flags & FIELD_IS_POINTER)
82d4a110 1191 sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
d1d438a3 1192 else if (strcmp(field->type, "pid_t") == 0)
82d4a110 1193 sc->arg_fmt[idx].scnprintf = SCA_PID;
ba2f22cf 1194 else if (strcmp(field->type, "umode_t") == 0)
82d4a110 1195 sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
b6565c90
ACM
1196 else if ((strcmp(field->type, "int") == 0 ||
1197 strcmp(field->type, "unsigned int") == 0 ||
1198 strcmp(field->type, "long") == 0) &&
1199 (len = strlen(field->name)) >= 2 &&
1200 strcmp(field->name + len - 2, "fd") == 0) {
1201 /*
1202 * /sys/kernel/tracing/events/syscalls/sys_enter*
1203 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
1204 * 65 int
1205 * 23 unsigned int
1206 * 7 unsigned long
1207 */
82d4a110 1208 sc->arg_fmt[idx].scnprintf = SCA_FD;
b6565c90 1209 }
13d4ff3e
ACM
1210 }
1211
1212 return 0;
1213}
1214
514f1c67
ACM
1215static int trace__read_syscall_info(struct trace *trace, int id)
1216{
1217 char tp_name[128];
1218 struct syscall *sc;
fd0db102 1219 const char *name = syscalltbl__name(trace->sctbl, id);
3a531260
ACM
1220
1221 if (name == NULL)
1222 return -1;
514f1c67
ACM
1223
1224 if (id > trace->syscalls.max) {
1225 struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
1226
1227 if (nsyscalls == NULL)
1228 return -1;
1229
1230 if (trace->syscalls.max != -1) {
1231 memset(nsyscalls + trace->syscalls.max + 1, 0,
1232 (id - trace->syscalls.max) * sizeof(*sc));
1233 } else {
1234 memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
1235 }
1236
1237 trace->syscalls.table = nsyscalls;
1238 trace->syscalls.max = id;
1239 }
1240
1241 sc = trace->syscalls.table + id;
3a531260 1242 sc->name = name;
2ae3a312 1243
3a531260 1244 sc->fmt = syscall_fmt__find(sc->name);
514f1c67 1245
aec1930b 1246 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
97978b3e 1247 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1248
8dd2a131 1249 if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
aec1930b 1250 snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
97978b3e 1251 sc->tp_format = trace_event__tp_format("syscalls", tp_name);
aec1930b 1252 }
514f1c67 1253
8dd2a131 1254 if (IS_ERR(sc->tp_format))
13d4ff3e
ACM
1255 return -1;
1256
f208bd8d
ACM
1257 sc->args = sc->tp_format->format.fields;
1258 sc->nr_args = sc->tp_format->format.nr_fields;
c42de706
TS
1259 /*
1260 * We need to check and discard the first variable '__syscall_nr'
1261 * or 'nr' that mean the syscall number. It is needless here.
1262 * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
1263 */
1264 if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
f208bd8d
ACM
1265 sc->args = sc->args->next;
1266 --sc->nr_args;
1267 }
1268
5089f20e
ACM
1269 sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
1270
13d4ff3e 1271 return syscall__set_arg_fmts(sc);
514f1c67
ACM
1272}
1273
d0cc439b
ACM
1274static int trace__validate_ev_qualifier(struct trace *trace)
1275{
8b3ce757 1276 int err = 0, i;
d0cc439b
ACM
1277 struct str_node *pos;
1278
8b3ce757
ACM
1279 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
1280 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1281 sizeof(trace->ev_qualifier_ids.entries[0]));
1282
1283 if (trace->ev_qualifier_ids.entries == NULL) {
1284 fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
1285 trace->output);
1286 err = -EINVAL;
1287 goto out;
1288 }
1289
1290 i = 0;
1291
602a1f4d 1292 strlist__for_each_entry(pos, trace->ev_qualifier) {
d0cc439b 1293 const char *sc = pos->s;
fd0db102 1294 int id = syscalltbl__id(trace->sctbl, sc);
d0cc439b 1295
8b3ce757 1296 if (id < 0) {
d0cc439b
ACM
1297 if (err == 0) {
1298 fputs("Error:\tInvalid syscall ", trace->output);
1299 err = -EINVAL;
1300 } else {
1301 fputs(", ", trace->output);
1302 }
1303
1304 fputs(sc, trace->output);
1305 }
8b3ce757
ACM
1306
1307 trace->ev_qualifier_ids.entries[i++] = id;
d0cc439b
ACM
1308 }
1309
1310 if (err < 0) {
1311 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1312 "\nHint:\tand: 'man syscalls'\n", trace->output);
8b3ce757
ACM
1313 zfree(&trace->ev_qualifier_ids.entries);
1314 trace->ev_qualifier_ids.nr = 0;
d0cc439b 1315 }
8b3ce757 1316out:
d0cc439b
ACM
1317 return err;
1318}
1319
55d43bca
DA
1320/*
1321 * args is to be interpreted as a series of longs but we need to handle
1322 * 8-byte unaligned accesses. args points to raw_data within the event
1323 * and raw_data is guaranteed to be 8-byte unaligned because it is
1324 * preceded by raw_size which is a u32. So we need to copy args to a temp
1325 * variable to read it. Most notably this avoids extended load instructions
1326 * on unaligned addresses
1327 */
f9f83b33
ACM
1328static unsigned long __syscall_arg__val(unsigned char *args, u8 idx)
1329{
1330 unsigned long val;
1331 unsigned char *p = args + sizeof(unsigned long) * idx;
1332
1333 memcpy(&val, p, sizeof(val));
1334 return val;
1335}
1336
1337unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
1338{
1339 return __syscall_arg__val(arg->args, idx);
1340}
55d43bca 1341
752fde44 1342static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
55d43bca 1343 unsigned char *args, struct trace *trace,
75b757ca 1344 struct thread *thread)
514f1c67 1345{
514f1c67 1346 size_t printed = 0;
55d43bca 1347 unsigned long val;
84486caa
ACM
1348 struct thread_trace *ttrace = thread__priv(thread);
1349
1350 /*
1351 * Things like fcntl will set this in its 'cmd' formatter to pick the
1352 * right formatter for the return value (an fd? file flags?), which is
1353 * not needed for syscalls that always return a given type, say an fd.
1354 */
1355 ttrace->ret_scnprintf = NULL;
514f1c67 1356
f208bd8d 1357 if (sc->args != NULL) {
514f1c67 1358 struct format_field *field;
01533e97
ACM
1359 u8 bit = 1;
1360 struct syscall_arg arg = {
f9f83b33 1361 .args = args,
75b757ca
ACM
1362 .idx = 0,
1363 .mask = 0,
1364 .trace = trace,
1365 .thread = thread,
01533e97 1366 };
6e7eeb51 1367
f208bd8d 1368 for (field = sc->args; field;
01533e97
ACM
1369 field = field->next, ++arg.idx, bit <<= 1) {
1370 if (arg.mask & bit)
6e7eeb51 1371 continue;
55d43bca 1372
f9f83b33 1373 val = syscall_arg__val(&arg, arg.idx);
55d43bca 1374
4aa58232
ACM
1375 /*
1376 * Suppress this argument if its value is zero and
1377 * and we don't have a string associated in an
1378 * strarray for it.
1379 */
55d43bca 1380 if (val == 0 &&
82d4a110 1381 !(sc->arg_fmt &&
d47737d5
ACM
1382 (sc->arg_fmt[arg.idx].show_zero ||
1383 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
82d4a110
ACM
1384 sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
1385 sc->arg_fmt[arg.idx].parm))
22ae5cf1
ACM
1386 continue;
1387
752fde44 1388 printed += scnprintf(bf + printed, size - printed,
13d4ff3e 1389 "%s%s: ", printed ? ", " : "", field->name);
82d4a110 1390 if (sc->arg_fmt && sc->arg_fmt[arg.idx].scnprintf) {
55d43bca 1391 arg.val = val;
82d4a110
ACM
1392 if (sc->arg_fmt[arg.idx].parm)
1393 arg.parm = sc->arg_fmt[arg.idx].parm;
1394 printed += sc->arg_fmt[arg.idx].scnprintf(bf + printed, size - printed, &arg);
6e7eeb51 1395 } else {
13d4ff3e 1396 printed += scnprintf(bf + printed, size - printed,
55d43bca 1397 "%ld", val);
6e7eeb51 1398 }
514f1c67 1399 }
4c4d6e51
ACM
1400 } else if (IS_ERR(sc->tp_format)) {
1401 /*
1402 * If we managed to read the tracepoint /format file, then we
1403 * may end up not having any args, like with gettid(), so only
1404 * print the raw args when we didn't manage to read it.
1405 */
01533e97
ACM
1406 int i = 0;
1407
514f1c67 1408 while (i < 6) {
f9f83b33 1409 val = __syscall_arg__val(args, i);
752fde44
ACM
1410 printed += scnprintf(bf + printed, size - printed,
1411 "%sarg%d: %ld",
55d43bca 1412 printed ? ", " : "", i, val);
514f1c67
ACM
1413 ++i;
1414 }
1415 }
1416
1417 return printed;
1418}
1419
ba3d7dee 1420typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1421 union perf_event *event,
ba3d7dee
ACM
1422 struct perf_sample *sample);
1423
1424static struct syscall *trace__syscall_info(struct trace *trace,
bf2575c1 1425 struct perf_evsel *evsel, int id)
ba3d7dee 1426{
ba3d7dee
ACM
1427
1428 if (id < 0) {
adaa18bf
ACM
1429
1430 /*
1431 * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
1432 * before that, leaving at a higher verbosity level till that is
1433 * explained. Reproduced with plain ftrace with:
1434 *
1435 * echo 1 > /t/events/raw_syscalls/sys_exit/enable
1436 * grep "NR -1 " /t/trace_pipe
1437 *
1438 * After generating some load on the machine.
1439 */
1440 if (verbose > 1) {
1441 static u64 n;
1442 fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
1443 id, perf_evsel__name(evsel), ++n);
1444 }
ba3d7dee
ACM
1445 return NULL;
1446 }
1447
1448 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
1449 trace__read_syscall_info(trace, id))
1450 goto out_cant_read;
1451
1452 if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
1453 goto out_cant_read;
1454
1455 return &trace->syscalls.table[id];
1456
1457out_cant_read:
bb963e16 1458 if (verbose > 0) {
7c304ee0
ACM
1459 fprintf(trace->output, "Problems reading syscall %d", id);
1460 if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
1461 fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
1462 fputs(" information\n", trace->output);
1463 }
ba3d7dee
ACM
1464 return NULL;
1465}
1466
bf2575c1
DA
1467static void thread__update_stats(struct thread_trace *ttrace,
1468 int id, struct perf_sample *sample)
1469{
1470 struct int_node *inode;
1471 struct stats *stats;
1472 u64 duration = 0;
1473
1474 inode = intlist__findnew(ttrace->syscall_stats, id);
1475 if (inode == NULL)
1476 return;
1477
1478 stats = inode->priv;
1479 if (stats == NULL) {
1480 stats = malloc(sizeof(struct stats));
1481 if (stats == NULL)
1482 return;
1483 init_stats(stats);
1484 inode->priv = stats;
1485 }
1486
1487 if (ttrace->entry_time && sample->time > ttrace->entry_time)
1488 duration = sample->time - ttrace->entry_time;
1489
1490 update_stats(stats, duration);
1491}
1492
e596663e
ACM
1493static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample)
1494{
1495 struct thread_trace *ttrace;
1496 u64 duration;
1497 size_t printed;
1498
1499 if (trace->current == NULL)
1500 return 0;
1501
1502 ttrace = thread__priv(trace->current);
1503
1504 if (!ttrace->entry_pending)
1505 return 0;
1506
1507 duration = sample->time - ttrace->entry_time;
1508
fd2b2975 1509 printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output);
e596663e
ACM
1510 printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
1511 ttrace->entry_pending = false;
1512
1513 return printed;
1514}
1515
ba3d7dee 1516static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1517 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1518 struct perf_sample *sample)
1519{
752fde44 1520 char *msg;
ba3d7dee 1521 void *args;
752fde44 1522 size_t printed = 0;
2ae3a312 1523 struct thread *thread;
b91fc39f 1524 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
bf2575c1 1525 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1526 struct thread_trace *ttrace;
1527
1528 if (sc == NULL)
1529 return -1;
ba3d7dee 1530
8fb598e5 1531 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1532 ttrace = thread__trace(thread, trace->output);
2ae3a312 1533 if (ttrace == NULL)
b91fc39f 1534 goto out_put;
ba3d7dee 1535
77170988 1536 args = perf_evsel__sc_tp_ptr(evsel, args, sample);
752fde44
ACM
1537
1538 if (ttrace->entry_str == NULL) {
e4d44e83 1539 ttrace->entry_str = malloc(trace__entry_str_size);
752fde44 1540 if (!ttrace->entry_str)
b91fc39f 1541 goto out_put;
752fde44
ACM
1542 }
1543
5cf9c84e 1544 if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
6ebad5c1 1545 trace__printf_interrupted_entry(trace, sample);
e596663e 1546
752fde44
ACM
1547 ttrace->entry_time = sample->time;
1548 msg = ttrace->entry_str;
e4d44e83 1549 printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
752fde44 1550
e4d44e83 1551 printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
75b757ca 1552 args, trace, thread);
752fde44 1553
5089f20e 1554 if (sc->is_exit) {
5cf9c84e 1555 if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) {
fd2b2975 1556 trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
c008f78f 1557 fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
ae9ed035 1558 }
7f4f8001 1559 } else {
752fde44 1560 ttrace->entry_pending = true;
7f4f8001
ACM
1561 /* See trace__vfs_getname & trace__sys_exit */
1562 ttrace->filename.pending_open = false;
1563 }
ba3d7dee 1564
f3b623b8
ACM
1565 if (trace->current != thread) {
1566 thread__put(trace->current);
1567 trace->current = thread__get(thread);
1568 }
b91fc39f
ACM
1569 err = 0;
1570out_put:
1571 thread__put(thread);
1572 return err;
ba3d7dee
ACM
1573}
1574
5cf9c84e
ACM
1575static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
1576 struct perf_sample *sample,
1577 struct callchain_cursor *cursor)
202ff968
ACM
1578{
1579 struct addr_location al;
5cf9c84e
ACM
1580
1581 if (machine__resolve(trace->host, &al, sample) < 0 ||
1582 thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack))
1583 return -1;
1584
1585 return 0;
1586}
1587
1588static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
1589{
202ff968 1590 /* TODO: user-configurable print_opts */
e20ab86e
ACM
1591 const unsigned int print_opts = EVSEL__PRINT_SYM |
1592 EVSEL__PRINT_DSO |
1593 EVSEL__PRINT_UNKNOWN_AS_ADDR;
202ff968 1594
d327e60c 1595 return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
202ff968
ACM
1596}
1597
ba3d7dee 1598static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1599 union perf_event *event __maybe_unused,
ba3d7dee
ACM
1600 struct perf_sample *sample)
1601{
2c82c3ad 1602 long ret;
60c907ab 1603 u64 duration = 0;
fd2b2975 1604 bool duration_calculated = false;
2ae3a312 1605 struct thread *thread;
5cf9c84e 1606 int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
bf2575c1 1607 struct syscall *sc = trace__syscall_info(trace, evsel, id);
2ae3a312
ACM
1608 struct thread_trace *ttrace;
1609
1610 if (sc == NULL)
1611 return -1;
ba3d7dee 1612
8fb598e5 1613 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
c24ff998 1614 ttrace = thread__trace(thread, trace->output);
2ae3a312 1615 if (ttrace == NULL)
b91fc39f 1616 goto out_put;
ba3d7dee 1617
bf2575c1
DA
1618 if (trace->summary)
1619 thread__update_stats(ttrace, id, sample);
1620
77170988 1621 ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
ba3d7dee 1622
fd0db102 1623 if (id == trace->open_id && ret >= 0 && ttrace->filename.pending_open) {
7f4f8001
ACM
1624 trace__set_fd_pathname(thread, ret, ttrace->filename.name);
1625 ttrace->filename.pending_open = false;
c522739d
ACM
1626 ++trace->stats.vfs_getname;
1627 }
1628
ae9ed035 1629 if (ttrace->entry_time) {
60c907ab 1630 duration = sample->time - ttrace->entry_time;
ae9ed035
ACM
1631 if (trace__filter_duration(trace, duration))
1632 goto out;
fd2b2975 1633 duration_calculated = true;
ae9ed035
ACM
1634 } else if (trace->duration_filter)
1635 goto out;
60c907ab 1636
5cf9c84e
ACM
1637 if (sample->callchain) {
1638 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1639 if (callchain_ret == 0) {
1640 if (callchain_cursor.nr < trace->min_stack)
1641 goto out;
1642 callchain_ret = 1;
1643 }
1644 }
1645
fd2eabaf
DA
1646 if (trace->summary_only)
1647 goto out;
1648
fd2b2975 1649 trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
752fde44
ACM
1650
1651 if (ttrace->entry_pending) {
c24ff998 1652 fprintf(trace->output, "%-70s", ttrace->entry_str);
752fde44 1653 } else {
c24ff998
ACM
1654 fprintf(trace->output, " ... [");
1655 color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
1656 fprintf(trace->output, "]: %s()", sc->name);
752fde44
ACM
1657 }
1658
da3c9a44 1659 if (sc->fmt == NULL) {
1f63139c
ACM
1660 if (ret < 0)
1661 goto errno_print;
da3c9a44 1662signed_print:
6f8fe61e 1663 fprintf(trace->output, ") = %ld", ret);
1f63139c
ACM
1664 } else if (ret < 0) {
1665errno_print: {
942a91ed 1666 char bf[STRERR_BUFSIZE];
c8b5f2c9 1667 const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
ba3d7dee
ACM
1668 *e = audit_errno_to_name(-ret);
1669
c24ff998 1670 fprintf(trace->output, ") = -1 %s %s", e, emsg);
1f63139c 1671 }
da3c9a44 1672 } else if (ret == 0 && sc->fmt->timeout)
c24ff998 1673 fprintf(trace->output, ") = 0 Timeout");
84486caa
ACM
1674 else if (ttrace->ret_scnprintf) {
1675 char bf[1024];
7ee57434
ACM
1676 struct syscall_arg arg = {
1677 .val = ret,
1678 .thread = thread,
1679 .trace = trace,
1680 };
1681 ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
84486caa
ACM
1682 ttrace->ret_scnprintf = NULL;
1683 fprintf(trace->output, ") = %s", bf);
1684 } else if (sc->fmt->hexret)
2c82c3ad 1685 fprintf(trace->output, ") = %#lx", ret);
11c8e39f
ACM
1686 else if (sc->fmt->errpid) {
1687 struct thread *child = machine__find_thread(trace->host, ret, ret);
1688
1689 if (child != NULL) {
1690 fprintf(trace->output, ") = %ld", ret);
1691 if (child->comm_set)
1692 fprintf(trace->output, " (%s)", thread__comm_str(child));
1693 thread__put(child);
1694 }
1695 } else
da3c9a44 1696 goto signed_print;
ba3d7dee 1697
c24ff998 1698 fputc('\n', trace->output);
566a0885 1699
5cf9c84e
ACM
1700 if (callchain_ret > 0)
1701 trace__fprintf_callchain(trace, sample);
1702 else if (callchain_ret < 0)
1703 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
ae9ed035 1704out:
752fde44 1705 ttrace->entry_pending = false;
b91fc39f
ACM
1706 err = 0;
1707out_put:
1708 thread__put(thread);
1709 return err;
ba3d7dee
ACM
1710}
1711
c522739d 1712static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1713 union perf_event *event __maybe_unused,
c522739d
ACM
1714 struct perf_sample *sample)
1715{
f994592d
ACM
1716 struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1717 struct thread_trace *ttrace;
1718 size_t filename_len, entry_str_len, to_move;
1719 ssize_t remaining_space;
1720 char *pos;
7f4f8001 1721 const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
f994592d
ACM
1722
1723 if (!thread)
1724 goto out;
1725
1726 ttrace = thread__priv(thread);
1727 if (!ttrace)
ef65e96e 1728 goto out_put;
f994592d 1729
7f4f8001 1730 filename_len = strlen(filename);
39f0e7a8 1731 if (filename_len == 0)
ef65e96e 1732 goto out_put;
7f4f8001
ACM
1733
1734 if (ttrace->filename.namelen < filename_len) {
1735 char *f = realloc(ttrace->filename.name, filename_len + 1);
1736
1737 if (f == NULL)
ef65e96e 1738 goto out_put;
7f4f8001
ACM
1739
1740 ttrace->filename.namelen = filename_len;
1741 ttrace->filename.name = f;
1742 }
1743
1744 strcpy(ttrace->filename.name, filename);
1745 ttrace->filename.pending_open = true;
1746
f994592d 1747 if (!ttrace->filename.ptr)
ef65e96e 1748 goto out_put;
f994592d
ACM
1749
1750 entry_str_len = strlen(ttrace->entry_str);
1751 remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
1752 if (remaining_space <= 0)
ef65e96e 1753 goto out_put;
f994592d 1754
f994592d
ACM
1755 if (filename_len > (size_t)remaining_space) {
1756 filename += filename_len - remaining_space;
1757 filename_len = remaining_space;
1758 }
1759
1760 to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
1761 pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
1762 memmove(pos + filename_len, pos, to_move);
1763 memcpy(pos, filename, filename_len);
1764
1765 ttrace->filename.ptr = 0;
1766 ttrace->filename.entry_str_pos = 0;
ef65e96e
ACM
1767out_put:
1768 thread__put(thread);
f994592d 1769out:
c522739d
ACM
1770 return 0;
1771}
1772
1302d88e 1773static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
0c82adcf 1774 union perf_event *event __maybe_unused,
1302d88e
ACM
1775 struct perf_sample *sample)
1776{
1777 u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
1778 double runtime_ms = (double)runtime / NSEC_PER_MSEC;
8fb598e5 1779 struct thread *thread = machine__findnew_thread(trace->host,
314add6b
AH
1780 sample->pid,
1781 sample->tid);
c24ff998 1782 struct thread_trace *ttrace = thread__trace(thread, trace->output);
1302d88e
ACM
1783
1784 if (ttrace == NULL)
1785 goto out_dump;
1786
1787 ttrace->runtime_ms += runtime_ms;
1788 trace->runtime_ms += runtime_ms;
ef65e96e 1789out_put:
b91fc39f 1790 thread__put(thread);
1302d88e
ACM
1791 return 0;
1792
1793out_dump:
c24ff998 1794 fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
1302d88e
ACM
1795 evsel->name,
1796 perf_evsel__strval(evsel, sample, "comm"),
1797 (pid_t)perf_evsel__intval(evsel, sample, "pid"),
1798 runtime,
1799 perf_evsel__intval(evsel, sample, "vruntime"));
ef65e96e 1800 goto out_put;
1302d88e
ACM
1801}
1802
1d6c9407
WN
1803static void bpf_output__printer(enum binary_printer_ops op,
1804 unsigned int val, void *extra)
1805{
1806 FILE *output = extra;
1807 unsigned char ch = (unsigned char)val;
1808
1809 switch (op) {
1810 case BINARY_PRINT_CHAR_DATA:
1811 fprintf(output, "%c", isprint(ch) ? ch : '.');
1812 break;
1813 case BINARY_PRINT_DATA_BEGIN:
1814 case BINARY_PRINT_LINE_BEGIN:
1815 case BINARY_PRINT_ADDR:
1816 case BINARY_PRINT_NUM_DATA:
1817 case BINARY_PRINT_NUM_PAD:
1818 case BINARY_PRINT_SEP:
1819 case BINARY_PRINT_CHAR_PAD:
1820 case BINARY_PRINT_LINE_END:
1821 case BINARY_PRINT_DATA_END:
1822 default:
1823 break;
1824 }
1825}
1826
1827static void bpf_output__fprintf(struct trace *trace,
1828 struct perf_sample *sample)
1829{
1830 print_binary(sample->raw_data, sample->raw_size, 8,
1831 bpf_output__printer, trace->output);
1832}
1833
14a052df
ACM
1834static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
1835 union perf_event *event __maybe_unused,
1836 struct perf_sample *sample)
1837{
7ad35615
ACM
1838 int callchain_ret = 0;
1839
1840 if (sample->callchain) {
1841 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1842 if (callchain_ret == 0) {
1843 if (callchain_cursor.nr < trace->min_stack)
1844 goto out;
1845 callchain_ret = 1;
1846 }
1847 }
1848
14a052df
ACM
1849 trace__printf_interrupted_entry(trace, sample);
1850 trace__fprintf_tstamp(trace, sample->time, trace->output);
0808921a
ACM
1851
1852 if (trace->trace_syscalls)
1853 fprintf(trace->output, "( ): ");
1854
1855 fprintf(trace->output, "%s:", evsel->name);
14a052df 1856
1d6c9407
WN
1857 if (perf_evsel__is_bpf_output(evsel)) {
1858 bpf_output__fprintf(trace, sample);
1859 } else if (evsel->tp_format) {
14a052df
ACM
1860 event_format__fprintf(evsel->tp_format, sample->cpu,
1861 sample->raw_data, sample->raw_size,
1862 trace->output);
1863 }
1864
1865 fprintf(trace->output, ")\n");
202ff968 1866
7ad35615
ACM
1867 if (callchain_ret > 0)
1868 trace__fprintf_callchain(trace, sample);
1869 else if (callchain_ret < 0)
1870 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
1871out:
14a052df
ACM
1872 return 0;
1873}
1874
598d02c5
SF
1875static void print_location(FILE *f, struct perf_sample *sample,
1876 struct addr_location *al,
1877 bool print_dso, bool print_sym)
1878{
1879
bb963e16 1880 if ((verbose > 0 || print_dso) && al->map)
598d02c5
SF
1881 fprintf(f, "%s@", al->map->dso->long_name);
1882
bb963e16 1883 if ((verbose > 0 || print_sym) && al->sym)
4414a3c5 1884 fprintf(f, "%s+0x%" PRIx64, al->sym->name,
598d02c5
SF
1885 al->addr - al->sym->start);
1886 else if (al->map)
4414a3c5 1887 fprintf(f, "0x%" PRIx64, al->addr);
598d02c5 1888 else
4414a3c5 1889 fprintf(f, "0x%" PRIx64, sample->addr);
598d02c5
SF
1890}
1891
1892static int trace__pgfault(struct trace *trace,
1893 struct perf_evsel *evsel,
473398a2 1894 union perf_event *event __maybe_unused,
598d02c5
SF
1895 struct perf_sample *sample)
1896{
1897 struct thread *thread;
598d02c5
SF
1898 struct addr_location al;
1899 char map_type = 'd';
a2ea67d7 1900 struct thread_trace *ttrace;
b91fc39f 1901 int err = -1;
1df54290 1902 int callchain_ret = 0;
598d02c5
SF
1903
1904 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1df54290
ACM
1905
1906 if (sample->callchain) {
1907 callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
1908 if (callchain_ret == 0) {
1909 if (callchain_cursor.nr < trace->min_stack)
1910 goto out_put;
1911 callchain_ret = 1;
1912 }
1913 }
1914
a2ea67d7
SF
1915 ttrace = thread__trace(thread, trace->output);
1916 if (ttrace == NULL)
b91fc39f 1917 goto out_put;
a2ea67d7
SF
1918
1919 if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
1920 ttrace->pfmaj++;
1921 else
1922 ttrace->pfmin++;
1923
1924 if (trace->summary_only)
b91fc39f 1925 goto out;
598d02c5 1926
473398a2 1927 thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
598d02c5
SF
1928 sample->ip, &al);
1929
fd2b2975 1930 trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
598d02c5
SF
1931
1932 fprintf(trace->output, "%sfault [",
1933 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
1934 "maj" : "min");
1935
1936 print_location(trace->output, sample, &al, false, true);
1937
1938 fprintf(trace->output, "] => ");
1939
473398a2 1940 thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
598d02c5
SF
1941 sample->addr, &al);
1942
1943 if (!al.map) {
473398a2 1944 thread__find_addr_location(thread, sample->cpumode,
598d02c5
SF
1945 MAP__FUNCTION, sample->addr, &al);
1946
1947 if (al.map)
1948 map_type = 'x';
1949 else
1950 map_type = '?';
1951 }
1952
1953 print_location(trace->output, sample, &al, true, false);
1954
1955 fprintf(trace->output, " (%c%c)\n", map_type, al.level);
0c3a6ef4 1956
1df54290
ACM
1957 if (callchain_ret > 0)
1958 trace__fprintf_callchain(trace, sample);
1959 else if (callchain_ret < 0)
1960 pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
b91fc39f
ACM
1961out:
1962 err = 0;
1963out_put:
1964 thread__put(thread);
1965 return err;
598d02c5
SF
1966}
1967
e6001980 1968static void trace__set_base_time(struct trace *trace,
8a07a809 1969 struct perf_evsel *evsel,
e6001980
ACM
1970 struct perf_sample *sample)
1971{
8a07a809
ACM
1972 /*
1973 * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
1974 * and don't use sample->time unconditionally, we may end up having
1975 * some other event in the future without PERF_SAMPLE_TIME for good
1976 * reason, i.e. we may not be interested in its timestamps, just in
1977 * it taking place, picking some piece of information when it
1978 * appears in our event stream (vfs_getname comes to mind).
1979 */
1980 if (trace->base_time == 0 && !trace->full_time &&
1981 (evsel->attr.sample_type & PERF_SAMPLE_TIME))
e6001980
ACM
1982 trace->base_time = sample->time;
1983}
1984
6810fc91 1985static int trace__process_sample(struct perf_tool *tool,
0c82adcf 1986 union perf_event *event,
6810fc91
DA
1987 struct perf_sample *sample,
1988 struct perf_evsel *evsel,
1989 struct machine *machine __maybe_unused)
1990{
1991 struct trace *trace = container_of(tool, struct trace, tool);
aa07df6e 1992 struct thread *thread;
6810fc91
DA
1993 int err = 0;
1994
744a9719 1995 tracepoint_handler handler = evsel->handler;
6810fc91 1996
aa07df6e
DA
1997 thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
1998 if (thread && thread__is_filtered(thread))
ef65e96e 1999 goto out;
bdc89661 2000
e6001980 2001 trace__set_base_time(trace, evsel, sample);
6810fc91 2002
3160565f
DA
2003 if (handler) {
2004 ++trace->nr_events;
0c82adcf 2005 handler(trace, evsel, event, sample);
3160565f 2006 }
ef65e96e
ACM
2007out:
2008 thread__put(thread);
6810fc91
DA
2009 return err;
2010}
2011
1e28fe0a 2012static int trace__record(struct trace *trace, int argc, const char **argv)
5e2485b1
DA
2013{
2014 unsigned int rec_argc, i, j;
2015 const char **rec_argv;
2016 const char * const record_args[] = {
2017 "record",
2018 "-R",
2019 "-m", "1024",
2020 "-c", "1",
5e2485b1
DA
2021 };
2022
1e28fe0a
SF
2023 const char * const sc_args[] = { "-e", };
2024 unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
2025 const char * const majpf_args[] = { "-e", "major-faults" };
2026 unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
2027 const char * const minpf_args[] = { "-e", "minor-faults" };
2028 unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
2029
9aca7f17 2030 /* +1 is for the event string below */
1e28fe0a
SF
2031 rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
2032 majpf_args_nr + minpf_args_nr + argc;
5e2485b1
DA
2033 rec_argv = calloc(rec_argc + 1, sizeof(char *));
2034
2035 if (rec_argv == NULL)
2036 return -ENOMEM;
2037
1e28fe0a 2038 j = 0;
5e2485b1 2039 for (i = 0; i < ARRAY_SIZE(record_args); i++)
1e28fe0a
SF
2040 rec_argv[j++] = record_args[i];
2041
e281a960
SF
2042 if (trace->trace_syscalls) {
2043 for (i = 0; i < sc_args_nr; i++)
2044 rec_argv[j++] = sc_args[i];
2045
2046 /* event string may be different for older kernels - e.g., RHEL6 */
2047 if (is_valid_tracepoint("raw_syscalls:sys_enter"))
2048 rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
2049 else if (is_valid_tracepoint("syscalls:sys_enter"))
2050 rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
2051 else {
2052 pr_err("Neither raw_syscalls nor syscalls events exist.\n");
2053 return -1;
2054 }
9aca7f17 2055 }
9aca7f17 2056
1e28fe0a
SF
2057 if (trace->trace_pgfaults & TRACE_PFMAJ)
2058 for (i = 0; i < majpf_args_nr; i++)
2059 rec_argv[j++] = majpf_args[i];
2060
2061 if (trace->trace_pgfaults & TRACE_PFMIN)
2062 for (i = 0; i < minpf_args_nr; i++)
2063 rec_argv[j++] = minpf_args[i];
2064
2065 for (i = 0; i < (unsigned int)argc; i++)
2066 rec_argv[j++] = argv[i];
5e2485b1 2067
b0ad8ea6 2068 return cmd_record(j, rec_argv);
5e2485b1
DA
2069}
2070
bf2575c1
DA
2071static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
2072
08c98776 2073static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
c522739d 2074{
ef503831 2075 struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
8dd2a131
JO
2076
2077 if (IS_ERR(evsel))
08c98776 2078 return false;
c522739d
ACM
2079
2080 if (perf_evsel__field(evsel, "pathname") == NULL) {
2081 perf_evsel__delete(evsel);
08c98776 2082 return false;
c522739d
ACM
2083 }
2084
744a9719 2085 evsel->handler = trace__vfs_getname;
c522739d 2086 perf_evlist__add(evlist, evsel);
08c98776 2087 return true;
c522739d
ACM
2088}
2089
0ae537cb 2090static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
598d02c5
SF
2091{
2092 struct perf_evsel *evsel;
2093 struct perf_event_attr attr = {
2094 .type = PERF_TYPE_SOFTWARE,
2095 .mmap_data = 1,
598d02c5
SF
2096 };
2097
2098 attr.config = config;
0524798c 2099 attr.sample_period = 1;
598d02c5
SF
2100
2101 event_attr_init(&attr);
2102
2103 evsel = perf_evsel__new(&attr);
0ae537cb
ACM
2104 if (evsel)
2105 evsel->handler = trace__pgfault;
598d02c5 2106
0ae537cb 2107 return evsel;
598d02c5
SF
2108}
2109
ddbb1b13
ACM
2110static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
2111{
2112 const u32 type = event->header.type;
2113 struct perf_evsel *evsel;
2114
ddbb1b13
ACM
2115 if (type != PERF_RECORD_SAMPLE) {
2116 trace__process_event(trace, trace->host, event, sample);
2117 return;
2118 }
2119
2120 evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
2121 if (evsel == NULL) {
2122 fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
2123 return;
2124 }
2125
e6001980
ACM
2126 trace__set_base_time(trace, evsel, sample);
2127
ddbb1b13
ACM
2128 if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
2129 sample->raw_data == NULL) {
2130 fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
2131 perf_evsel__name(evsel), sample->tid,
2132 sample->cpu, sample->raw_size);
2133 } else {
2134 tracepoint_handler handler = evsel->handler;
2135 handler(trace, evsel, event, sample);
2136 }
2137}
2138
c27366f0
ACM
2139static int trace__add_syscall_newtp(struct trace *trace)
2140{
2141 int ret = -1;
2142 struct perf_evlist *evlist = trace->evlist;
2143 struct perf_evsel *sys_enter, *sys_exit;
2144
2145 sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter);
2146 if (sys_enter == NULL)
2147 goto out;
2148
2149 if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
2150 goto out_delete_sys_enter;
2151
2152 sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit);
2153 if (sys_exit == NULL)
2154 goto out_delete_sys_enter;
2155
2156 if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
2157 goto out_delete_sys_exit;
2158
2159 perf_evlist__add(evlist, sys_enter);
2160 perf_evlist__add(evlist, sys_exit);
2161
2ddd5c04 2162 if (callchain_param.enabled && !trace->kernel_syscallchains) {
44621819
ACM
2163 /*
2164 * We're interested only in the user space callchain
2165 * leading to the syscall, allow overriding that for
2166 * debugging reasons using --kernel_syscall_callchains
2167 */
2168 sys_exit->attr.exclude_callchain_kernel = 1;
2169 }
2170
8b3ce757
ACM
2171 trace->syscalls.events.sys_enter = sys_enter;
2172 trace->syscalls.events.sys_exit = sys_exit;
c27366f0
ACM
2173
2174 ret = 0;
2175out:
2176 return ret;
2177
2178out_delete_sys_exit:
2179 perf_evsel__delete_priv(sys_exit);
2180out_delete_sys_enter:
2181 perf_evsel__delete_priv(sys_enter);
2182 goto out;
2183}
2184
19867b61
ACM
2185static int trace__set_ev_qualifier_filter(struct trace *trace)
2186{
2187 int err = -1;
b15d0a4c 2188 struct perf_evsel *sys_exit;
19867b61
ACM
2189 char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
2190 trace->ev_qualifier_ids.nr,
2191 trace->ev_qualifier_ids.entries);
2192
2193 if (filter == NULL)
2194 goto out_enomem;
2195
3541c034
MP
2196 if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
2197 filter)) {
b15d0a4c 2198 sys_exit = trace->syscalls.events.sys_exit;
3541c034 2199 err = perf_evsel__append_tp_filter(sys_exit, filter);
b15d0a4c 2200 }
19867b61
ACM
2201
2202 free(filter);
2203out:
2204 return err;
2205out_enomem:
2206 errno = ENOMEM;
2207 goto out;
2208}
c27366f0 2209
f15eb531 2210static int trace__run(struct trace *trace, int argc, const char **argv)
514f1c67 2211{
14a052df 2212 struct perf_evlist *evlist = trace->evlist;
0ae537cb 2213 struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
efd5745e
ACM
2214 int err = -1, i;
2215 unsigned long before;
f15eb531 2216 const bool forks = argc > 0;
46fb3c21 2217 bool draining = false;
514f1c67 2218
75b757ca
ACM
2219 trace->live = true;
2220
c27366f0 2221 if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
801c67b0 2222 goto out_error_raw_syscalls;
514f1c67 2223
e281a960 2224 if (trace->trace_syscalls)
08c98776 2225 trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
c522739d 2226
0ae537cb
ACM
2227 if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
2228 pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
2229 if (pgfault_maj == NULL)
2230 goto out_error_mem;
2231 perf_evlist__add(evlist, pgfault_maj);
e2726d99 2232 }
598d02c5 2233
0ae537cb
ACM
2234 if ((trace->trace_pgfaults & TRACE_PFMIN)) {
2235 pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
2236 if (pgfault_min == NULL)
2237 goto out_error_mem;
2238 perf_evlist__add(evlist, pgfault_min);
2239 }
598d02c5 2240
1302d88e 2241 if (trace->sched &&
2cc990ba
ACM
2242 perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
2243 trace__sched_stat_runtime))
2244 goto out_error_sched_stat_runtime;
1302d88e 2245
514f1c67
ACM
2246 err = perf_evlist__create_maps(evlist, &trace->opts.target);
2247 if (err < 0) {
c24ff998 2248 fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
514f1c67
ACM
2249 goto out_delete_evlist;
2250 }
2251
752fde44
ACM
2252 err = trace__symbols_init(trace, evlist);
2253 if (err < 0) {
c24ff998 2254 fprintf(trace->output, "Problems initializing symbol libraries!\n");
03ad9747 2255 goto out_delete_evlist;
752fde44
ACM
2256 }
2257
fde54b78
ACM
2258 perf_evlist__config(evlist, &trace->opts, NULL);
2259
0c3a6ef4
ACM
2260 if (callchain_param.enabled) {
2261 bool use_identifier = false;
2262
2263 if (trace->syscalls.events.sys_exit) {
2264 perf_evsel__config_callchain(trace->syscalls.events.sys_exit,
2265 &trace->opts, &callchain_param);
2266 use_identifier = true;
2267 }
2268
2269 if (pgfault_maj) {
2270 perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
2271 use_identifier = true;
2272 }
2273
2274 if (pgfault_min) {
2275 perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
2276 use_identifier = true;
2277 }
2278
2279 if (use_identifier) {
2280 /*
2281 * Now we have evsels with different sample_ids, use
2282 * PERF_SAMPLE_IDENTIFIER to map from sample to evsel
2283 * from a fixed position in each ring buffer record.
2284 *
2285 * As of this the changeset introducing this comment, this
2286 * isn't strictly needed, as the fields that can come before
2287 * PERF_SAMPLE_ID are all used, but we'll probably disable
2288 * some of those for things like copying the payload of
2289 * pointer syscall arguments, and for vfs_getname we don't
2290 * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this
2291 * here as a warning we need to use PERF_SAMPLE_IDENTIFIER.
2292 */
2293 perf_evlist__set_sample_bit(evlist, IDENTIFIER);
2294 perf_evlist__reset_sample_bit(evlist, ID);
2295 }
fde54b78 2296 }
514f1c67 2297
f15eb531
NK
2298 signal(SIGCHLD, sig_handler);
2299 signal(SIGINT, sig_handler);
2300
2301 if (forks) {
6ef73ec4 2302 err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
735f7e0b 2303 argv, false, NULL);
f15eb531 2304 if (err < 0) {
c24ff998 2305 fprintf(trace->output, "Couldn't run the workload!\n");
03ad9747 2306 goto out_delete_evlist;
f15eb531
NK
2307 }
2308 }
2309
514f1c67 2310 err = perf_evlist__open(evlist);
a8f23d8f
ACM
2311 if (err < 0)
2312 goto out_error_open;
514f1c67 2313
ba504235
WN
2314 err = bpf__apply_obj_config();
2315 if (err) {
2316 char errbuf[BUFSIZ];
2317
2318 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
2319 pr_err("ERROR: Apply config to BPF failed: %s\n",
2320 errbuf);
2321 goto out_error_open;
2322 }
2323
241b057c
ACM
2324 /*
2325 * Better not use !target__has_task() here because we need to cover the
2326 * case where no threads were specified in the command line, but a
2327 * workload was, and in that case we will fill in the thread_map when
2328 * we fork the workload in perf_evlist__prepare_workload.
2329 */
f078c385
ACM
2330 if (trace->filter_pids.nr > 0)
2331 err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
e13798c7 2332 else if (thread_map__pid(evlist->threads, 0) == -1)
f078c385
ACM
2333 err = perf_evlist__set_filter_pid(evlist, getpid());
2334
94ad89bc
ACM
2335 if (err < 0)
2336 goto out_error_mem;
2337
19867b61
ACM
2338 if (trace->ev_qualifier_ids.nr > 0) {
2339 err = trace__set_ev_qualifier_filter(trace);
2340 if (err < 0)
2341 goto out_errno;
19867b61 2342
2e5e5f87
ACM
2343 pr_debug("event qualifier tracepoint filter: %s\n",
2344 trace->syscalls.events.sys_exit->filter);
2345 }
19867b61 2346
94ad89bc
ACM
2347 err = perf_evlist__apply_filters(evlist, &evsel);
2348 if (err < 0)
2349 goto out_error_apply_filters;
241b057c 2350
f885037e 2351 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
e09b18d4
ACM
2352 if (err < 0)
2353 goto out_error_mmap;
514f1c67 2354
e36b7821 2355 if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
cb24d01d
ACM
2356 perf_evlist__enable(evlist);
2357
f15eb531
NK
2358 if (forks)
2359 perf_evlist__start_workload(evlist);
2360
e36b7821
AB
2361 if (trace->opts.initial_delay) {
2362 usleep(trace->opts.initial_delay * 1000);
2363 perf_evlist__enable(evlist);
2364 }
2365
e13798c7 2366 trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
42052bea
ACM
2367 evlist->threads->nr > 1 ||
2368 perf_evlist__first(evlist)->attr.inherit;
514f1c67 2369again:
efd5745e 2370 before = trace->nr_events;
514f1c67
ACM
2371
2372 for (i = 0; i < evlist->nr_mmaps; i++) {
2373 union perf_event *event;
2374
2375 while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) {
514f1c67 2376 struct perf_sample sample;
514f1c67 2377
efd5745e 2378 ++trace->nr_events;
514f1c67 2379
514f1c67
ACM
2380 err = perf_evlist__parse_sample(evlist, event, &sample);
2381 if (err) {
c24ff998 2382 fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
8e50d384 2383 goto next_event;
514f1c67
ACM
2384 }
2385
ddbb1b13 2386 trace__handle_event(trace, event, &sample);
8e50d384
ZZ
2387next_event:
2388 perf_evlist__mmap_consume(evlist, i);
20c5f10e 2389
ba209f85
ACM
2390 if (interrupted)
2391 goto out_disable;
02ac5421
ACM
2392
2393 if (done && !draining) {
2394 perf_evlist__disable(evlist);
2395 draining = true;
2396 }
514f1c67
ACM
2397 }
2398 }
2399
efd5745e 2400 if (trace->nr_events == before) {
ba209f85 2401 int timeout = done ? 100 : -1;
f15eb531 2402
46fb3c21
ACM
2403 if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
2404 if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
2405 draining = true;
2406
ba209f85 2407 goto again;
46fb3c21 2408 }
ba209f85
ACM
2409 } else {
2410 goto again;
f15eb531
NK
2411 }
2412
ba209f85 2413out_disable:
f3b623b8
ACM
2414 thread__zput(trace->current);
2415
ba209f85 2416 perf_evlist__disable(evlist);
514f1c67 2417
c522739d
ACM
2418 if (!err) {
2419 if (trace->summary)
2420 trace__fprintf_thread_summary(trace, trace->output);
2421
2422 if (trace->show_tool_stats) {
2423 fprintf(trace->output, "Stats:\n "
2424 " vfs_getname : %" PRIu64 "\n"
2425 " proc_getname: %" PRIu64 "\n",
2426 trace->stats.vfs_getname,
2427 trace->stats.proc_getname);
2428 }
2429 }
bf2575c1 2430
514f1c67
ACM
2431out_delete_evlist:
2432 perf_evlist__delete(evlist);
14a052df 2433 trace->evlist = NULL;
75b757ca 2434 trace->live = false;
514f1c67 2435 return err;
6ef068cb
ACM
2436{
2437 char errbuf[BUFSIZ];
a8f23d8f 2438
2cc990ba 2439out_error_sched_stat_runtime:
988bdb31 2440 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
2cc990ba
ACM
2441 goto out_error;
2442
801c67b0 2443out_error_raw_syscalls:
988bdb31 2444 tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
a8f23d8f
ACM
2445 goto out_error;
2446
e09b18d4
ACM
2447out_error_mmap:
2448 perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
2449 goto out_error;
2450
a8f23d8f
ACM
2451out_error_open:
2452 perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
2453
2454out_error:
6ef068cb 2455 fprintf(trace->output, "%s\n", errbuf);
87f91868 2456 goto out_delete_evlist;
94ad89bc
ACM
2457
2458out_error_apply_filters:
2459 fprintf(trace->output,
2460 "Failed to set filter \"%s\" on event %s with %d (%s)\n",
2461 evsel->filter, perf_evsel__name(evsel), errno,
c8b5f2c9 2462 str_error_r(errno, errbuf, sizeof(errbuf)));
94ad89bc 2463 goto out_delete_evlist;
514f1c67 2464}
5ed08dae
ACM
2465out_error_mem:
2466 fprintf(trace->output, "Not enough memory to run!\n");
2467 goto out_delete_evlist;
19867b61
ACM
2468
2469out_errno:
2470 fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
2471 goto out_delete_evlist;
a8f23d8f 2472}
514f1c67 2473
6810fc91
DA
2474static int trace__replay(struct trace *trace)
2475{
2476 const struct perf_evsel_str_handler handlers[] = {
c522739d 2477 { "probe:vfs_getname", trace__vfs_getname, },
6810fc91 2478 };
f5fc1412
JO
2479 struct perf_data_file file = {
2480 .path = input_name,
2481 .mode = PERF_DATA_MODE_READ,
e366a6d8 2482 .force = trace->force,
f5fc1412 2483 };
6810fc91 2484 struct perf_session *session;
003824e8 2485 struct perf_evsel *evsel;
6810fc91
DA
2486 int err = -1;
2487
2488 trace->tool.sample = trace__process_sample;
2489 trace->tool.mmap = perf_event__process_mmap;
384c671e 2490 trace->tool.mmap2 = perf_event__process_mmap2;
6810fc91
DA
2491 trace->tool.comm = perf_event__process_comm;
2492 trace->tool.exit = perf_event__process_exit;
2493 trace->tool.fork = perf_event__process_fork;
2494 trace->tool.attr = perf_event__process_attr;
f3b3614a 2495 trace->tool.tracing_data = perf_event__process_tracing_data;
6810fc91 2496 trace->tool.build_id = perf_event__process_build_id;
f3b3614a 2497 trace->tool.namespaces = perf_event__process_namespaces;
6810fc91 2498
0a8cb85c 2499 trace->tool.ordered_events = true;
6810fc91
DA
2500 trace->tool.ordering_requires_timestamps = true;
2501
2502 /* add tid to output */
2503 trace->multiple_threads = true;
2504
f5fc1412 2505 session = perf_session__new(&file, false, &trace->tool);
6810fc91 2506 if (session == NULL)
52e02834 2507 return -1;
6810fc91 2508
aa07df6e
DA
2509 if (trace->opts.target.pid)
2510 symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
2511
2512 if (trace->opts.target.tid)
2513 symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
2514
0a7e6d1b 2515 if (symbol__init(&session->header.env) < 0)
cb2ffae2
NK
2516 goto out;
2517
8fb598e5
DA
2518 trace->host = &session->machines.host;
2519
6810fc91
DA
2520 err = perf_session__set_tracepoints_handlers(session, handlers);
2521 if (err)
2522 goto out;
2523
003824e8
NK
2524 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2525 "raw_syscalls:sys_enter");
9aca7f17
DA
2526 /* older kernels have syscalls tp versus raw_syscalls */
2527 if (evsel == NULL)
2528 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2529 "syscalls:sys_enter");
003824e8 2530
e281a960
SF
2531 if (evsel &&
2532 (perf_evsel__init_syscall_tp(evsel, trace__sys_enter) < 0 ||
2533 perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
003824e8
NK
2534 pr_err("Error during initialize raw_syscalls:sys_enter event\n");
2535 goto out;
2536 }
2537
2538 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2539 "raw_syscalls:sys_exit");
9aca7f17
DA
2540 if (evsel == NULL)
2541 evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
2542 "syscalls:sys_exit");
e281a960
SF
2543 if (evsel &&
2544 (perf_evsel__init_syscall_tp(evsel, trace__sys_exit) < 0 ||
2545 perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
003824e8 2546 pr_err("Error during initialize raw_syscalls:sys_exit event\n");
6810fc91
DA
2547 goto out;
2548 }
2549
e5cadb93 2550 evlist__for_each_entry(session->evlist, evsel) {
1e28fe0a
SF
2551 if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
2552 (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
2553 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
2554 evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
2555 evsel->handler = trace__pgfault;
2556 }
2557
6810fc91
DA
2558 setup_pager();
2559
b7b61cbe 2560 err = perf_session__process_events(session);
6810fc91
DA
2561 if (err)
2562 pr_err("Failed to process events, error %d", err);
2563
bf2575c1
DA
2564 else if (trace->summary)
2565 trace__fprintf_thread_summary(trace, trace->output);
2566
6810fc91
DA
2567out:
2568 perf_session__delete(session);
2569
2570 return err;
2571}
2572
1302d88e
ACM
2573static size_t trace__fprintf_threads_header(FILE *fp)
2574{
2575 size_t printed;
2576
99ff7150 2577 printed = fprintf(fp, "\n Summary of events:\n\n");
bf2575c1
DA
2578
2579 return printed;
2580}
2581
b535d523
ACM
2582DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
2583 struct stats *stats;
2584 double msecs;
2585 int syscall;
2586)
2587{
2588 struct int_node *source = rb_entry(nd, struct int_node, rb_node);
2589 struct stats *stats = source->priv;
2590
2591 entry->syscall = source->i;
2592 entry->stats = stats;
2593 entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
2594}
2595
bf2575c1
DA
2596static size_t thread__dump_stats(struct thread_trace *ttrace,
2597 struct trace *trace, FILE *fp)
2598{
bf2575c1
DA
2599 size_t printed = 0;
2600 struct syscall *sc;
b535d523
ACM
2601 struct rb_node *nd;
2602 DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
bf2575c1 2603
b535d523 2604 if (syscall_stats == NULL)
bf2575c1
DA
2605 return 0;
2606
2607 printed += fprintf(fp, "\n");
2608
834fd46d
MW
2609 printed += fprintf(fp, " syscall calls total min avg max stddev\n");
2610 printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
2611 printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
99ff7150 2612
98a91837 2613 resort_rb__for_each_entry(nd, syscall_stats) {
b535d523 2614 struct stats *stats = syscall_stats_entry->stats;
bf2575c1
DA
2615 if (stats) {
2616 double min = (double)(stats->min) / NSEC_PER_MSEC;
2617 double max = (double)(stats->max) / NSEC_PER_MSEC;
2618 double avg = avg_stats(stats);
2619 double pct;
2620 u64 n = (u64) stats->n;
2621
2622 pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
2623 avg /= NSEC_PER_MSEC;
2624
b535d523 2625 sc = &trace->syscalls.table[syscall_stats_entry->syscall];
99ff7150 2626 printed += fprintf(fp, " %-15s", sc->name);
834fd46d 2627 printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
b535d523 2628 n, syscall_stats_entry->msecs, min, avg);
27a778b5 2629 printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
bf2575c1 2630 }
bf2575c1
DA
2631 }
2632
b535d523 2633 resort_rb__delete(syscall_stats);
bf2575c1 2634 printed += fprintf(fp, "\n\n");
1302d88e
ACM
2635
2636 return printed;
2637}
2638
96c14451 2639static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
896cbb56 2640{
96c14451 2641 size_t printed = 0;
89dceb22 2642 struct thread_trace *ttrace = thread__priv(thread);
896cbb56
DA
2643 double ratio;
2644
2645 if (ttrace == NULL)
2646 return 0;
2647
2648 ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
2649
15e65c69 2650 printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
99ff7150 2651 printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
15e65c69 2652 printed += fprintf(fp, "%.1f%%", ratio);
a2ea67d7
SF
2653 if (ttrace->pfmaj)
2654 printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
2655 if (ttrace->pfmin)
2656 printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
03548ebf
ACM
2657 if (trace->sched)
2658 printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
2659 else if (fputc('\n', fp) != EOF)
2660 ++printed;
2661
bf2575c1 2662 printed += thread__dump_stats(ttrace, trace, fp);
896cbb56 2663
96c14451
ACM
2664 return printed;
2665}
896cbb56 2666
96c14451
ACM
2667static unsigned long thread__nr_events(struct thread_trace *ttrace)
2668{
2669 return ttrace ? ttrace->nr_events : 0;
2670}
2671
2672DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
2673 struct thread *thread;
2674)
2675{
2676 entry->thread = rb_entry(nd, struct thread, rb_node);
896cbb56
DA
2677}
2678
1302d88e
ACM
2679static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
2680{
96c14451
ACM
2681 DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
2682 size_t printed = trace__fprintf_threads_header(fp);
2683 struct rb_node *nd;
1302d88e 2684
96c14451
ACM
2685 if (threads == NULL) {
2686 fprintf(fp, "%s", "Error sorting output by nr_events!\n");
2687 return 0;
2688 }
2689
98a91837 2690 resort_rb__for_each_entry(nd, threads)
96c14451 2691 printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
896cbb56 2692
96c14451
ACM
2693 resort_rb__delete(threads);
2694
2695 return printed;
1302d88e
ACM
2696}
2697
ae9ed035
ACM
2698static int trace__set_duration(const struct option *opt, const char *str,
2699 int unset __maybe_unused)
2700{
2701 struct trace *trace = opt->value;
2702
2703 trace->duration_filter = atof(str);
2704 return 0;
2705}
2706
f078c385
ACM
2707static int trace__set_filter_pids(const struct option *opt, const char *str,
2708 int unset __maybe_unused)
2709{
2710 int ret = -1;
2711 size_t i;
2712 struct trace *trace = opt->value;
2713 /*
2714 * FIXME: introduce a intarray class, plain parse csv and create a
2715 * { int nr, int entries[] } struct...
2716 */
2717 struct intlist *list = intlist__new(str);
2718
2719 if (list == NULL)
2720 return -1;
2721
2722 i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
2723 trace->filter_pids.entries = calloc(i, sizeof(pid_t));
2724
2725 if (trace->filter_pids.entries == NULL)
2726 goto out;
2727
2728 trace->filter_pids.entries[0] = getpid();
2729
2730 for (i = 1; i < trace->filter_pids.nr; ++i)
2731 trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
2732
2733 intlist__delete(list);
2734 ret = 0;
2735out:
2736 return ret;
2737}
2738
c24ff998
ACM
2739static int trace__open_output(struct trace *trace, const char *filename)
2740{
2741 struct stat st;
2742
2743 if (!stat(filename, &st) && st.st_size) {
2744 char oldname[PATH_MAX];
2745
2746 scnprintf(oldname, sizeof(oldname), "%s.old", filename);
2747 unlink(oldname);
2748 rename(filename, oldname);
2749 }
2750
2751 trace->output = fopen(filename, "w");
2752
2753 return trace->output == NULL ? -errno : 0;
2754}
2755
598d02c5
SF
2756static int parse_pagefaults(const struct option *opt, const char *str,
2757 int unset __maybe_unused)
2758{
2759 int *trace_pgfaults = opt->value;
2760
2761 if (strcmp(str, "all") == 0)
2762 *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
2763 else if (strcmp(str, "maj") == 0)
2764 *trace_pgfaults |= TRACE_PFMAJ;
2765 else if (strcmp(str, "min") == 0)
2766 *trace_pgfaults |= TRACE_PFMIN;
2767 else
2768 return -1;
2769
2770 return 0;
2771}
2772
14a052df
ACM
2773static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
2774{
2775 struct perf_evsel *evsel;
2776
e5cadb93 2777 evlist__for_each_entry(evlist, evsel)
14a052df
ACM
2778 evsel->handler = handler;
2779}
2780
017037ff
ACM
2781/*
2782 * XXX: Hackish, just splitting the combined -e+--event (syscalls
2783 * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
2784 * existing facilities unchanged (trace->ev_qualifier + parse_options()).
2785 *
2786 * It'd be better to introduce a parse_options() variant that would return a
2787 * list with the terms it didn't match to an event...
2788 */
2789static int trace__parse_events_option(const struct option *opt, const char *str,
2790 int unset __maybe_unused)
2791{
2792 struct trace *trace = (struct trace *)opt->value;
2793 const char *s = str;
2794 char *sep = NULL, *lists[2] = { NULL, NULL, };
2795 int len = strlen(str), err = -1, list;
2796 char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
2797 char group_name[PATH_MAX];
2798
2799 if (strace_groups_dir == NULL)
2800 return -1;
2801
2802 if (*s == '!') {
2803 ++s;
2804 trace->not_ev_qualifier = true;
2805 }
2806
2807 while (1) {
2808 if ((sep = strchr(s, ',')) != NULL)
2809 *sep = '\0';
2810
2811 list = 0;
2812 if (syscalltbl__id(trace->sctbl, s) >= 0) {
2813 list = 1;
2814 } else {
2815 path__join(group_name, sizeof(group_name), strace_groups_dir, s);
2816 if (access(group_name, R_OK) == 0)
2817 list = 1;
2818 }
2819
2820 if (lists[list]) {
2821 sprintf(lists[list] + strlen(lists[list]), ",%s", s);
2822 } else {
2823 lists[list] = malloc(len);
2824 if (lists[list] == NULL)
2825 goto out;
2826 strcpy(lists[list], s);
2827 }
2828
2829 if (!sep)
2830 break;
2831
2832 *sep = ',';
2833 s = sep + 1;
2834 }
2835
2836 if (lists[1] != NULL) {
2837 struct strlist_config slist_config = {
2838 .dirname = strace_groups_dir,
2839 };
2840
2841 trace->ev_qualifier = strlist__new(lists[1], &slist_config);
2842 if (trace->ev_qualifier == NULL) {
2843 fputs("Not enough memory to parse event qualifier", trace->output);
2844 goto out;
2845 }
2846
2847 if (trace__validate_ev_qualifier(trace))
2848 goto out;
2849 }
2850
2851 err = 0;
2852
2853 if (lists[0]) {
2854 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
2855 "event selector. use 'perf list' to list available events",
2856 parse_events_option);
2857 err = parse_events_option(&o, lists[0], 0);
2858 }
2859out:
2860 if (sep)
2861 *sep = ',';
2862
2863 return err;
2864}
2865
b0ad8ea6 2866int cmd_trace(int argc, const char **argv)
514f1c67 2867{
6fdd9cb7 2868 const char *trace_usage[] = {
f15eb531
NK
2869 "perf trace [<options>] [<command>]",
2870 "perf trace [<options>] -- <command> [<options>]",
5e2485b1
DA
2871 "perf trace record [<options>] [<command>]",
2872 "perf trace record [<options>] -- <command> [<options>]",
514f1c67
ACM
2873 NULL
2874 };
2875 struct trace trace = {
514f1c67
ACM
2876 .syscalls = {
2877 . max = -1,
2878 },
2879 .opts = {
2880 .target = {
2881 .uid = UINT_MAX,
2882 .uses_mmap = true,
2883 },
2884 .user_freq = UINT_MAX,
2885 .user_interval = ULLONG_MAX,
509051ea 2886 .no_buffering = true,
38d5447d 2887 .mmap_pages = UINT_MAX,
9d9cad76 2888 .proc_map_timeout = 500,
514f1c67 2889 },
007d66a0 2890 .output = stderr,
50c95cbd 2891 .show_comm = true,
e281a960 2892 .trace_syscalls = true,
44621819 2893 .kernel_syscallchains = false,
05614993 2894 .max_stack = UINT_MAX,
514f1c67 2895 };
c24ff998 2896 const char *output_name = NULL;
514f1c67 2897 const struct option trace_options[] = {
017037ff
ACM
2898 OPT_CALLBACK('e', "event", &trace, "event",
2899 "event/syscall selector. use 'perf list' to list available events",
2900 trace__parse_events_option),
50c95cbd
ACM
2901 OPT_BOOLEAN(0, "comm", &trace.show_comm,
2902 "show the thread COMM next to its id"),
c522739d 2903 OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
017037ff
ACM
2904 OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
2905 trace__parse_events_option),
c24ff998 2906 OPT_STRING('o', "output", &output_name, "file", "output file name"),
6810fc91 2907 OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
514f1c67
ACM
2908 OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
2909 "trace events on existing process id"),
ac9be8ee 2910 OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
514f1c67 2911 "trace events on existing thread id"),
fa0e4ffe
ACM
2912 OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
2913 "pids to filter (by the kernel)", trace__set_filter_pids),
ac9be8ee 2914 OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
514f1c67 2915 "system-wide collection from all CPUs"),
ac9be8ee 2916 OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
514f1c67 2917 "list of cpus to monitor"),
6810fc91 2918 OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
514f1c67 2919 "child tasks do not inherit counters"),
994a1f78
JO
2920 OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
2921 "number of mmap data pages",
2922 perf_evlist__parse_mmap_pages),
ac9be8ee 2923 OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
514f1c67 2924 "user to profile"),
ae9ed035
ACM
2925 OPT_CALLBACK(0, "duration", &trace, "float",
2926 "show only events with duration > N.M ms",
2927 trace__set_duration),
1302d88e 2928 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
7c304ee0 2929 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
4bb09192
DA
2930 OPT_BOOLEAN('T', "time", &trace.full_time,
2931 "Show full timestamp, not time relative to first start"),
fd2eabaf
DA
2932 OPT_BOOLEAN('s', "summary", &trace.summary_only,
2933 "Show only syscall summary with statistics"),
2934 OPT_BOOLEAN('S', "with-summary", &trace.summary,
2935 "Show all syscalls and summary with statistics"),
598d02c5
SF
2936 OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
2937 "Trace pagefaults", parse_pagefaults, "maj"),
e281a960 2938 OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
e366a6d8 2939 OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
566a0885
MW
2940 OPT_CALLBACK(0, "call-graph", &trace.opts,
2941 "record_mode[,record_size]", record_callchain_help,
2942 &record_parse_callchain_opt),
44621819
ACM
2943 OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
2944 "Show the kernel callchains on the syscall exit path"),
5cf9c84e
ACM
2945 OPT_UINTEGER(0, "min-stack", &trace.min_stack,
2946 "Set the minimum stack depth when parsing the callchain, "
2947 "anything below the specified depth will be ignored."),
c6d4a494
ACM
2948 OPT_UINTEGER(0, "max-stack", &trace.max_stack,
2949 "Set the maximum stack depth when parsing the callchain, "
2950 "anything beyond the specified depth will be ignored. "
4cb93446 2951 "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
9d9cad76
KL
2952 OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
2953 "per thread proc mmap processing timeout in ms"),
e36b7821
AB
2954 OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
2955 "ms to wait before starting measurement after program "
2956 "start"),
514f1c67
ACM
2957 OPT_END()
2958 };
ccd62a89 2959 bool __maybe_unused max_stack_user_set = true;
f3e459d1 2960 bool mmap_pages_user_set = true;
6fdd9cb7 2961 const char * const trace_subcommands[] = { "record", NULL };
514f1c67 2962 int err;
32caf0d1 2963 char bf[BUFSIZ];
514f1c67 2964
4d08cb80
ACM
2965 signal(SIGSEGV, sighandler_dump_stack);
2966 signal(SIGFPE, sighandler_dump_stack);
2967
14a052df 2968 trace.evlist = perf_evlist__new();
fd0db102 2969 trace.sctbl = syscalltbl__new();
14a052df 2970
fd0db102 2971 if (trace.evlist == NULL || trace.sctbl == NULL) {
14a052df 2972 pr_err("Not enough memory to run!\n");
ff8f695c 2973 err = -ENOMEM;
14a052df
ACM
2974 goto out;
2975 }
2976
6fdd9cb7
YS
2977 argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
2978 trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
fd2eabaf 2979
d7888573
WN
2980 err = bpf__setup_stdout(trace.evlist);
2981 if (err) {
2982 bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
2983 pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
2984 goto out;
2985 }
2986
59247e33
ACM
2987 err = -1;
2988
598d02c5
SF
2989 if (trace.trace_pgfaults) {
2990 trace.opts.sample_address = true;
2991 trace.opts.sample_time = true;
2992 }
2993
f3e459d1
ACM
2994 if (trace.opts.mmap_pages == UINT_MAX)
2995 mmap_pages_user_set = false;
2996
05614993 2997 if (trace.max_stack == UINT_MAX) {
fe176085 2998 trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl_perf_event_max_stack;
05614993
ACM
2999 max_stack_user_set = false;
3000 }
3001
3002#ifdef HAVE_DWARF_UNWIND_SUPPORT
caa36ed7 3003 if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls)
05614993
ACM
3004 record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
3005#endif
3006
2ddd5c04 3007 if (callchain_param.enabled) {
f3e459d1
ACM
3008 if (!mmap_pages_user_set && geteuid() == 0)
3009 trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
3010
566a0885 3011 symbol_conf.use_callchain = true;
f3e459d1 3012 }
566a0885 3013
14a052df
ACM
3014 if (trace.evlist->nr_entries > 0)
3015 evlist__set_evsel_handler(trace.evlist, trace__event_handler);
3016
1e28fe0a
SF
3017 if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
3018 return trace__record(&trace, argc-1, &argv[1]);
3019
3020 /* summary_only implies summary option, but don't overwrite summary if set */
3021 if (trace.summary_only)
3022 trace.summary = trace.summary_only;
3023
726f3234
ACM
3024 if (!trace.trace_syscalls && !trace.trace_pgfaults &&
3025 trace.evlist->nr_entries == 0 /* Was --events used? */) {
e281a960
SF
3026 pr_err("Please specify something to trace.\n");
3027 return -1;
3028 }
3029
017037ff 3030 if (!trace.trace_syscalls && trace.ev_qualifier) {
59247e33
ACM
3031 pr_err("The -e option can't be used with --no-syscalls.\n");
3032 goto out;
3033 }
3034
c24ff998
ACM
3035 if (output_name != NULL) {
3036 err = trace__open_output(&trace, output_name);
3037 if (err < 0) {
3038 perror("failed to create output file");
3039 goto out;
3040 }
3041 }
3042
fd0db102
ACM
3043 trace.open_id = syscalltbl__id(trace.sctbl, "open");
3044
602ad878 3045 err = target__validate(&trace.opts.target);
32caf0d1 3046 if (err) {
602ad878 3047 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3048 fprintf(trace.output, "%s", bf);
3049 goto out_close;
32caf0d1
NK
3050 }
3051
602ad878 3052 err = target__parse_uid(&trace.opts.target);
514f1c67 3053 if (err) {
602ad878 3054 target__strerror(&trace.opts.target, err, bf, sizeof(bf));
c24ff998
ACM
3055 fprintf(trace.output, "%s", bf);
3056 goto out_close;
514f1c67
ACM
3057 }
3058
602ad878 3059 if (!argc && target__none(&trace.opts.target))
ee76120e
NK
3060 trace.opts.target.system_wide = true;
3061
6810fc91
DA
3062 if (input_name)
3063 err = trace__replay(&trace);
3064 else
3065 err = trace__run(&trace, argc, argv);
1302d88e 3066
c24ff998
ACM
3067out_close:
3068 if (output_name != NULL)
3069 fclose(trace.output);
3070out:
1302d88e 3071 return err;
514f1c67 3072}